In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer



In [2]:
data= pd.read_csv('Customer-survey-data.csv')

In [3]:
data.head()

Unnamed: 0,Customer,Delivery_Experiance,Food_Quality,Delivery_Speed,Order_Accurate
0,1,5.0,3.0,4.0,Yes
1,2,3.0,4.0,3.0,Yes
2,3,4.0,5.0,2.0,Yes
3,4,5.0,3.0,4.0,Yes
4,5,2.0,5.0,1.0,Yes


In [4]:
data.shape

(10616, 5)

In [5]:
data.isnull().sum()

Customer                 0
Delivery_Experiance    418
Food_Quality           252
Delivery_Speed         239
Order_Accurate         660
dtype: int64

In [6]:
data.dropna()

Unnamed: 0,Customer,Delivery_Experiance,Food_Quality,Delivery_Speed,Order_Accurate
0,1,5.0,3.0,4.0,Yes
1,2,3.0,4.0,3.0,Yes
2,3,4.0,5.0,2.0,Yes
3,4,5.0,3.0,4.0,Yes
4,5,2.0,5.0,1.0,Yes
...,...,...,...,...,...
10611,10612,4.0,2.0,3.0,Yes
10612,10613,3.0,3.0,3.0,Yes
10613,10614,5.0,3.0,3.0,Yes
10614,10615,3.0,3.0,3.0,Yes


In [7]:
numerical_feature = ['Delivery_Experiance', 'Food_Quality', 'Delivery_Speed']

categorical = ['Order_Accurate']

In [8]:
numerical_transform = Pipeline(steps=[('imputer', SimpleImputer(strategy ='median')),
                                     ('scaler',StandardScaler())])

In [9]:
from sklearn.preprocessing import OneHotEncoder

In [10]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [11]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transform, numerical_feature),
        ('cat', categorical_transformer, categorical)
    ])

In [12]:
preprocessor

In [13]:
data.head()

Unnamed: 0,Customer,Delivery_Experiance,Food_Quality,Delivery_Speed,Order_Accurate
0,1,5.0,3.0,4.0,Yes
1,2,3.0,4.0,3.0,Yes
2,3,4.0,5.0,2.0,Yes
3,4,5.0,3.0,4.0,Yes
4,5,2.0,5.0,1.0,Yes


In [14]:
data.describe()

Unnamed: 0,Customer,Delivery_Experiance,Food_Quality,Delivery_Speed
count,10616.0,10198.0,10364.0,10377.0
mean,5308.5,3.32526,3.332015,3.322926
std,3064.719563,1.419754,1.414709,1.408918
min,1.0,1.0,1.0,1.0
25%,2654.75,2.0,2.0,2.0
50%,5308.5,3.0,3.0,3.0
75%,7962.25,5.0,5.0,5.0
max,10616.0,5.0,5.0,5.0


In [15]:
data['Satisfaction'] = data['Delivery_Experiance'].apply(lambda x: 1 if x >= 4 else 0 if pd.notnull(x) else np.nan)

In [16]:
data.dropna(subset=['Satisfaction'], inplace=True)

In [17]:
data.head()

Unnamed: 0,Customer,Delivery_Experiance,Food_Quality,Delivery_Speed,Order_Accurate,Satisfaction
0,1,5.0,3.0,4.0,Yes,1.0
1,2,3.0,4.0,3.0,Yes,0.0
2,3,4.0,5.0,2.0,Yes,1.0
3,4,5.0,3.0,4.0,Yes,1.0
4,5,2.0,5.0,1.0,Yes,0.0


In [18]:
X = data.drop(columns=['Satisfaction', 'Customer','Order_Accurate'])
y = data['Satisfaction']

In [20]:
X.fillna(X.mean(), inplace=True)

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
from sklearn.ensemble import GradientBoostingClassifier

In [23]:
gbc = GradientBoostingClassifier(n_estimators=100,learning_rate=0.1,max_depth=3,random_state=42)

In [25]:
gbc.fit(X_train,y_train)

In [26]:
y_pred = gbc.predict(X_test)

In [27]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [28]:
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


In [42]:
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

[[1150    0]
 [   0  890]]
