In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
import joblib

In [3]:
df = pd.read_csv("train.csv")

In [4]:
df.dropna(inplace=True)

In [5]:
X = df.drop(columns=["id", "satisfaction"])
y = df["satisfaction"]

In [6]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Assuming X is your DataFrame
X_encoded = X.copy()  # Make a copy of the original DataFrame

# Initialize LabelEncoder
label_encoders = {}

# Categorical columns
cat_columns = ['Gender', 'Customer Type', 'Type of Travel', 'Class']

for col in cat_columns:
    # Initialize LabelEncoder for each categorical column
    label_encoders[col] = LabelEncoder()
    # Fit LabelEncoder on the column and transform the column values
    X_encoded[col] = label_encoders[col].fit_transform(X[col])


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [8]:
print("Training Random Forest...")
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)
joblib.dump(rf_model, 'random_forest_model.pkl')

Training Random Forest...


['random_forest_model.pkl']

In [10]:
print("\nClassification report for Random Forest:")
print(classification_report(y_test, rf_y_pred))


Classification report for Random Forest:
                         precision    recall  f1-score   support

neutral or dissatisfied       0.95      0.98      0.97     11655
              satisfied       0.97      0.94      0.96      9064

               accuracy                           0.96     20719
              macro avg       0.96      0.96      0.96     20719
           weighted avg       0.96      0.96      0.96     20719



In [11]:
print("\nTraining Naive Bayes...")
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_y_pred = nb_model.predict(X_test)
joblib.dump(nb_model, 'naive_bayes_model.pkl')

print("\nClassification report for Naive Bayes:")
print(classification_report(y_test, nb_y_pred))


Training Naive Bayes...

Classification report for Naive Bayes:
                         precision    recall  f1-score   support

neutral or dissatisfied       0.86      0.90      0.88     11655
              satisfied       0.86      0.82      0.84      9064

               accuracy                           0.86     20719
              macro avg       0.86      0.86      0.86     20719
           weighted avg       0.86      0.86      0.86     20719



In [9]:
X_encoded.head()

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,Ease of Online booking,Gate location,...,Seat comfort,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,1,0,13,1,2,460,3,4,3,1,...,5,5,4,3,4,4,5,5,25,18.0
1,1,1,25,0,0,235,3,2,3,3,...,1,1,1,5,3,1,4,1,1,6.0
2,0,0,26,0,0,1142,2,2,2,2,...,5,5,4,3,4,4,4,5,0,0.0
3,0,0,25,0,0,562,2,5,5,5,...,2,2,2,5,3,1,4,2,11,9.0
4,1,0,61,0,0,214,3,3,3,3,...,5,3,3,4,4,3,3,3,0,0.0
