In [None]:
import requests
from bs4 import BeautifulSoup
import csv

# Send an HTTP request to the URL of the website
url = 'https://www.airlinequality.com/'
response = requests.get(url)

# Use BeautifulSoup to parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')

# Extract the review data (example: extracting review text and ratings)
reviews = []
for review in soup.find_all('div', class_='review'):
    review_text = review.find('p', class_='review-text').text
    rating = review.find('span', class_='rating').text
    reviews.append({'review_text': review_text, 'rating': rating})

import os

# Create the 'data' directory if it doesn't exist
directory = 'data'
if not os.path.exists(directory):
    os.makedirs(directory)

# Write the data to a CSV file within the 'data' directory
csv_file_path = os.path.join(directory, 'reviews.csv')
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['review_text', 'rating']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for review in reviews:
        writer.writerow(review)

# Write the data to a CSV file
with open('data/reviews.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['review_text', 'rating']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for review in reviews:
        writer.writerow(review)


In [None]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download the VADER lexicon
nltk.download('vader_lexicon')

# Load the VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Analyze the sentiment of each review
with open('data/reviews.csv', 'r', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        review_text = row['review_text']
        sentiment_score = sid.polarity_scores(review_text)['compound']
        # Now you can store or analyze the sentiment score as per your requirement


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
import pandas as pd


In [None]:
df = pd.read_csv("/content/customer_booking.csv", encoding="ISO-8859-1")
df.head()

Unnamed: 0,num_passengers,sales_channel,trip_type,purchase_lead,length_of_stay,flight_hour,flight_day,route,booking_origin,wants_extra_baggage,wants_preferred_seat,wants_in_flight_meals,flight_duration,booking_complete
0,2,Internet,RoundTrip,262,19,7,Sat,AKLDEL,New Zealand,1,0,0,5.52,0
1,1,Internet,RoundTrip,112,20,3,Sat,AKLDEL,New Zealand,0,0,0,5.52,0
2,2,Internet,RoundTrip,243,22,17,Wed,AKLDEL,India,1,1,0,5.52,0
3,1,Internet,RoundTrip,96,31,4,Sat,AKLDEL,New Zealand,0,0,1,5.52,0
4,2,Internet,RoundTrip,68,22,15,Wed,AKLDEL,India,1,0,1,5.52,0


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Load the dataset
df = pd.read_csv('/content/customer_booking.csv', encoding='latin-1')

# Explore the dataset
print(df.head())
print(df.info())
print(df.describe())

# Handle missing values or perform other data cleaning tasks if necessary

# Encode categorical variables
label_encoder = LabelEncoder()
df['sales_channel'] = label_encoder.fit_transform(df['sales_channel'])
df['trip_type'] = label_encoder.fit_transform(df['trip_type'])
df['route'] = label_encoder.fit_transform(df['route'])
df['booking_origin'] = label_encoder.fit_transform(df['booking_origin'])

# One-hot encode categorical variables
df = pd.get_dummies(df, columns=['sales_channel', 'trip_type', 'route', 'booking_origin'], drop_first=True)

# Define features (X) and target variable (y)
X = df.drop('booking_complete', axis=1)
y = df['booking_complete']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the RandomForest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
 # Change units and activation as needed

# Compile the model with your desired optimizer, loss function, and metrics
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Now, fit the model with your training data
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")

# Feature importance
feature_importance = model.feature_importances_
print("Feature Importance:")
for feature, importance in zip(X.columns, feature_importance):
    print(f"{feature}: {importance}")


   num_passengers sales_channel  trip_type  purchase_lead  length_of_stay  \
0               2      Internet  RoundTrip            262              19   
1               1      Internet  RoundTrip            112              20   
2               2      Internet  RoundTrip            243              22   
3               1      Internet  RoundTrip             96              31   
4               2      Internet  RoundTrip             68              22   

   flight_hour flight_day   route booking_origin  wants_extra_baggage  \
0            7        Sat  AKLDEL    New Zealand                    1   
1            3        Sat  AKLDEL    New Zealand                    0   
2           17        Wed  AKLDEL          India                    1   
3            4        Sat  AKLDEL    New Zealand                    0   
4           15        Wed  AKLDEL          India                    1   

   wants_preferred_seat  wants_in_flight_meals  flight_duration  \
0                     0        

AttributeError: 'RandomForestClassifier' object has no attribute 'compile'

In [None]:
from pptx import Presentation
from pptx.util import Inches

# Create a presentation object
presentation = Presentation()

# Add a title slide
slide_layout = presentation.slide_layouts[0]
slide = presentation.slides.add_slide(slide_layout)
title = slide.shapes.title
subtitle = slide.placeholders[1]

title.text = "Model Evaluation Summary"
subtitle.text = "Cross-Validation and Variable Contribution"

# Add content slides
# ...

# Save the presentation
presentation.save("Model_Evaluation_Summary.pptx")



ModuleNotFoundError: No module named 'pptx'