<a href="https://colab.research.google.com/github/PTmytrial/Python_Pediction/blob/main/Copy_of_BAN240_ReportAnalysis_CoefficientEquation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
!pip install scikit-learn

from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report



In [None]:
df = pd.read_csv('/content/airline_passenger_satisfaction.csv')
df.head()

Unnamed: 0,ID,Gender,Age,Customer Type,Type of Travel,Class,Flight Distance,Departure Delay,Arrival Delay,Departure and Arrival Time Convenience,...,On-board Service,Seat Comfort,Leg Room Service,Cleanliness,Food and Drink,In-flight Service,In-flight Wifi Service,In-flight Entertainment,Baggage Handling,Satisfaction
0,1,Male,48,First-time,Business,Business,821,2,5.0,3,...,3,5,2,5,5,5,3,5,5,Neutral or Dissatisfied
1,2,Female,35,Returning,Business,Business,821,26,39.0,2,...,5,4,5,5,3,5,2,5,5,Satisfied
2,3,Male,41,Returning,Business,Business,853,0,0.0,4,...,3,5,3,5,5,3,4,3,3,Satisfied
3,4,Male,50,Returning,Business,Business,1905,0,0.0,2,...,5,5,5,4,4,5,2,5,5,Satisfied
4,5,Female,49,Returning,Business,Business,3470,0,1.0,3,...,3,4,4,5,4,3,3,3,3,Satisfied


In [None]:
# Prepare the data
# Convert categorical variables to numeric
le = LabelEncoder()
df['Satisfaction_encoded'] = le.fit_transform(df['Satisfaction'])
df['Gender_encoded'] = le.fit_transform(df['Gender'])
df['Customer_Type_encoded'] = le.fit_transform(df['Customer Type'])
df['Travel_Type_encoded'] = le.fit_transform(df['Type of Travel'])
df['Class_encoded'] = le.fit_transform(df['Class'])


In [None]:
# Select features for the model
features = ['Age', 'Flight Distance', 'Departure Delay', 'Arrival Delay',
           'Departure and Arrival Time Convenience', 'Ease of Online Booking',
           'Check-in Service', 'Online Boarding', 'Gate Location',
           'On-board Service', 'Seat Comfort', 'Leg Room Service',
           'Cleanliness', 'Food and Drink', 'In-flight Service',
           'In-flight Wifi Service', 'In-flight Entertainment',
           'Baggage Handling', 'Gender_encoded', 'Customer_Type_encoded',
           'Travel_Type_encoded', 'Class_encoded']

In [None]:
X = df[features]
y = df['Satisfaction_encoded']

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Impute missing values using the mean
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Scale the features using the imputed data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Train the model
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train_scaled, y_train)

In [None]:
# Get feature coefficients and create equation
coefficients = pd.DataFrame({
    'Feature': features,
    'Coefficient': model.coef_[0]
}).sort_values(by='Coefficient', ascending=False)

print("\nModel Coefficients (sorted by importance):")
print(coefficients)


Model Coefficients (sorted by importance):
                                   Feature  Coefficient
7                          Online Boarding     0.816208
19                   Customer_Type_encoded     0.803822
15                  In-flight Wifi Service     0.525981
6                         Check-in Service     0.419571
9                         On-board Service     0.394830
11                        Leg Room Service     0.323708
12                             Cleanliness     0.299115
17                        Baggage Handling     0.170598
14                       In-flight Service     0.148684
2                          Departure Delay     0.110433
10                            Seat Comfort     0.093554
16                 In-flight Entertainment     0.065488
8                            Gate Location     0.030804
18                          Gender_encoded     0.026429
1                          Flight Distance    -0.000355
13                          Food and Drink    -0.032621
0   

In [None]:
# Make predictions and evaluate
y_pred = model.predict(X_test_scaled)
print("\nModel Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Model Accuracy: 0.8743070526639976

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.90      0.89     14723
           1       0.87      0.84      0.85     11253

    accuracy                           0.87     25976
   macro avg       0.87      0.87      0.87     25976
weighted avg       0.87      0.87      0.87     25976



**Final Satisfaction Prediction Equation:**

Log-Odds(Satisfaction)=Intercept+(0.816208×Online Boarding)+(0.803822×Customer Type)+(0.525981×In-flight Wifi Service)+(0.419571×Check-in Service)+(0.394830×On-board Service)+(0.323708×Leg Room Service)+(0.299115×Cleanliness)+(0.170598×Baggage Handling)+(0.148684×In-flight Service)+(0.110433×Departure Delay)+(0.093554×Seat Comfort)+(0.065488×In-flight Entertainment)+(0.030804×Gate Location)+(0.026429×Gender)−(0.000355×Flight Distance)−(0.032621×Food and Drink)−(0.139837×Age)−(0.199965×Departure and Arrival Time Convenience)−(0.218362×Ease of Online Booking)−(0.301839×Arrival Delay)−(0.321377×Class)−(1.311611×Travel Type)

Model Accuracy: 0.874