# Tips Dataset Model 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split,cross_val_score

# 1.Import Dataset

In [None]:
df = sns.load_dataset('tips')
df.head()

# 2.Preprocessing

In [None]:
df.isnull().sum()
df.duplicated().sum()
df = pd.get_dummies(df, columns=['sex', 'day', 'time'], drop_first=True)
df['smoker'] = df['smoker'].map({'Yes': 1, 'No': 0})

# 3.Create X and Y

In [None]:
X = df.drop(columns=['smoker'])
y = df['smoker']

In [None]:
df.head()

# 5.Train Test Split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

# 6.Create and Train Model

In [None]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
model = DecisionTreeClassifier()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [None]:
# Step 9: Evaluation
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Step 10: Display results
print("🔍 Model Evaluation Metrics")
print(f"Accuracy Score  : {acc:.2f}")
print(f"Precision Score : {prec:.2f}")
print(f"Recall Score    : {rec:.2f}")
print(f"F1 Score        : {f1:.2f}")


# Decision Tree Plot 

In [None]:
from sklearn import tree 
plt.figure(figsize=(20,10))
tree.plot_tree(model.fit(X,y),filled=True)
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred)

In [None]:
sns.heatmap(cm,annot=True,fmt='d',cmap='Greens')
plt.title('Confusion Matrix Heatmap')
plt.xticks([0.5,1.5],['Not Smoker',' Smoker'])
plt.yticks([0.5,1.5],['Not Smoker',' Smoker'], rotation=0)
plt.xlabel('Predicted Label')
plt.ylabel('Actual Label')

# Define Models to Evaluate

In [None]:
models = [LogisticRegression(),RandomForestClassifier(),KNeighborsClassifier(),SVC()]

names = ['LogisticRegression','DecisionTree','KNN','SVC']

# Define K-Fold CV 

In [None]:
k = 10
for name , model in zip(names,models):
    score = cross_val_score(model,X_train,y_train,cv=k)
    print(f'{name} CV Accuracy : {np.mean(score):.2f} +/- {np.std(score):.2f}')

# Apply Hyper Parameter Tuning on SVC

In [None]:
from sklearn.model_selection import GridSearchCV
# define parameters
param_grid = {
    'C':[0.1,1,10],
    'kernel':['linear','rbf'],
    'gamma':[0.1,1,10]
}
grid_search = GridSearchCV(SVC(),param_grid,cv=5)
grid_search.fit(X_train,y_train)

In [None]:
print(f'Best Parameters : {grid_search.best_params_}')
print(f'Best Score : {grid_search.best_score_}')

# Accuracy Score of All Models

In [245]:
model_score = []
for name , model in zip(names,models):
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test,y_pred)
    model_score.append([name,acc])

In [252]:
sorted_model = sorted(model_score,key=lambda x:x[1],reverse=True)

In [255]:
for model in sorted_model:    
    print(f'{model[0]} : {model[1]:.2f}')

LogisticRegression : 0.73
DecisionTree : 0.67
KNN : 0.65
SVC : 0.59
