## Loading Data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv('airline_passenger_satisfaction.csv')
df.head()

In [None]:
df.drop(columns='ID', inplace=True)
df.head()

In [None]:
df.shape

In [None]:
df.describe().round(2)

In [None]:
df.dtypes

## Data Cleaning

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df['Arrival Delay'].mean()

In [None]:
df['Arrival Delay'].fillna(df['Arrival Delay'].mean(), inplace=True)

In [None]:
# Arrival Delay is 0

df.isnull().sum()

## Charts

In [None]:
plt.pie(df['Satisfaction'].value_counts(), labels=['Neutral or Dissatisfied', 'Satisfied'], autopct='%1.1f%%')
plt.show()

In [None]:
cols = ['Gender', 'Customer Type', 'Type of Travel', 'Class', 'Satisfaction']
plt.figure(figsize=(15, 15))

for i, col in enumerate(cols):
    plt.subplot(3, 2, i + 1)
    sns.countplot(x=col, data=df)

In [None]:
df.hist(bins=20, figsize=(20, 20), color='green')
plt.show()

## Column Data Encoding

In [None]:
items = df.select_dtypes(include='object').columns

for i in items:
    print(df[i].unique())

In [None]:
# OPTION 1

label_encoder = LabelEncoder()

columns = df.select_dtypes(include='object').drop(columns='Satisfaction').columns

for column in columns:
    df[column] = label_encoder.fit_transform(df[column])

df.head()
df['Class'].unique()

In [17]:
# OPTION 2

# df.replace({
#     'Gender': {
#         'Male': 1,
#         'Female': 2
#     },
#     'Customer Type': {
#         'First-time': 1,
#         'Returning': 2
#     },
#     'Type of Travel': {
#         'Business': 1,
#         'Personal': 2
#     },
#     'Class': {
#         'Business': 1,
#         'Economy': 2,
#         'Economy Plus': 3
#     }
# }, inplace=True)

# df.head()

In [None]:
df.dtypes

## Additional Charts

In [None]:
plt.figure(figsize=(16, 8))
sns.heatmap(df.drop(columns='Satisfaction').corr(), annot=True, fmt='.2f', cmap='Greens')

In [None]:
sns.catplot(data=df, x='Age', height=4, aspect=4, kind='count', hue='Satisfaction', order=range(7, 73))

In [None]:
sns.catplot(data=df, x='On-board Service', height=4, aspect=4, kind='count', hue='Satisfaction')

In [None]:
sns.catplot(data=df, x='Gender', height=4, aspect=4, kind='count', hue='Satisfaction')

## Filtering Data

In [None]:
df[['Gender', 'Age', 'Type of Travel']].head()

In [None]:
df.loc[2:5, ['Gender', 'Age', 'Flight Distance']]

In [None]:
df.loc[df['Age'] > 50, ['Gender', 'Age', 'Flight Distance']].head()

In [None]:
df.loc[df['Age'] == 50, ['Gender', 'Age', 'Flight Distance']].head()

In [None]:
df.iloc[10:15, 1:7].head()

In [None]:
df.columns

## Models

In [None]:
X = df.drop(columns='Satisfaction')
X.head(3)

In [None]:
y = df['Satisfaction']
y.head(3)

### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

model = DecisionTreeClassifier()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X.shape

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
model.fit(X_train, y_train)

In [None]:
predictions = model.predict(X_test)
predictions

In [None]:
model_score = accuracy_score(y_test, predictions)
model_score

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

model = RandomForestClassifier()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model.fit(X_train, y_train)

predictions = model.predict(X_test)
predictions

model_score = accuracy_score(y_test, predictions)
model_score

### KNeighborsClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

model = KNeighborsClassifier()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model.fit(X_train, y_train)

predictions = model.predict(X_test)
predictions

model_score = accuracy_score(y_test, predictions)
model_score

### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

model = LogisticRegression(max_iter=10000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model.fit(X_train, y_train)

predictions = model.predict(X_test)
predictions

model_score = accuracy_score(y_test, predictions)
model_score

## Prediction Without Voting Columns

In [None]:
X = df[['Gender', 'Age', 'Customer Type', 'Type of Travel', 'Class', 'Flight Distance', 'Departure Delay', 'Arrival Delay']]
X.head()

In [None]:
y = df['Satisfaction']
y.head()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

model = RandomForestClassifier()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model.fit(X_train, y_train)

predictions = model.predict(X_test)
predictions

model_score = accuracy_score(y_test, predictions)
model_score

In [None]:
X.iloc[:0].to_dict()

In [None]:
test_iputs = {
    'Gender': [1, 0],
    'Age': [35, 25],
    'Customer Type': [0, 1],
    'Type of Travel': [0, 0],
    'Class': [1, 1],
    'Flight Distance': [1200, 600],
    'Departure Delay': [0, 55],
    'Arrival Delay': [0, 0]
}

test_df = pd.DataFrame(test_iputs)
test_df

In [None]:
model.predict(test_df)

## Saving Prediction Model

In [None]:
import joblib

joblib.dump(model, 'airline_passenger_satisfaction.joblib')

In [None]:
test_iputs = {
    'Gender': [1, 0],
    'Age': [35, 25],
    'Customer Type': [0, 1],
    'Type of Travel': [0, 0],
    'Class': [1, 1],
    'Flight Distance': [1200, 600],
    'Departure Delay': [0, 55],
    'Arrival Delay': [0, 0]
}

test_df = pd.DataFrame(test_iputs)
test_df

In [None]:
trained_model = joblib.load('airline_passenger_satisfaction.joblib')
trained_model.predict(test_df)