# Tip Prediction Using Polynomial Regression

In [9]:
# Import Dataset

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [10]:
# Import Dataset
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [11]:
# Missing Value check
df.isna().sum()

total_bill    0
tip           0
sex           0
smoker        0
day           0
time          0
size          0
dtype: int64

There is no missing value

In [12]:
# Data Type Check
df.dtypes

total_bill     float64
tip            float64
sex           category
smoker        category
day           category
time          category
size             int64
dtype: object

There are several features that have category types, so we drop the feature

In [13]:
df = df.drop(columns=['sex', 'smoker', 'day', 'time'])

In [14]:
X = df.drop(columns='tip')
y = df['tip']


In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size= .80, random_state = 1)

In [16]:
# Creating polynomial features 
Poli = PolynomialFeatures(degree=3, include_bias=False, interaction_only=True)
Poli = Poli.fit(X_train)
X_trainPoli = Poli.transform(X_train)
X_testPoli = Poli.transform(X_test)

In [17]:
df_XtrainPoli = pd.DataFrame(X_trainPoli)
df_XtestPoli = pd.DataFrame(X_testPoli)

### Creating models and evaluation matrix 

In [31]:
def Eva_Matrix_Base_Class(model, X_train, X_test, y_train, y_test, Nama):
    Model = model.fit(X_train, y_train)
    y_pred_train = Model.predict(X_train)
    r2_tr = r2_score(y_train, y_pred_train)
    mae_tr = mean_absolute_error(y_train, y_pred_train)
    mse_tr = mean_squared_error(y_train, y_pred_train)
    rmse_tr = np.sqrt(mse_tr)
    y_pred_test = Model.predict(X_test)
    r2_ts = r2_score(y_test, y_pred_test)
    mae_ts = mean_absolute_error(y_test, y_pred_test)
    mse_ts = mean_squared_error(y_test, y_pred_test)
    rmse_ts = np.sqrt(mse_ts)
    data_LR = {
    Nama + " Training" : [round(r2_tr,2), round(mae_tr,2), round(mse_tr,2), round(rmse_tr,2)],
    Nama + " Testing"  : [round(r2_ts,2), round(mae_ts,2), round(mse_ts,2), round(rmse_ts,2)],
}

    df_eva = pd.DataFrame(data_LR, index=["R2_Score", "MAE", "MSE", "RMSE"])
    return df_eva

### Results of evaluation matrix before using polynomial features

In [32]:
df_eva_base_model = Eva_Matrix_Base_Class(LinearRegression(), X_train, X_test, y_train, y_test, 'Linear Regression Base Model')
df_eva_base_model

Unnamed: 0,Linear Regression Base Model Training,Linear Regression Base Model Testing
R2_Score,0.44,0.51
MAE,0.74,0.79
MSE,0.96,1.3
RMSE,0.98,1.14


### Results of evaluation matrix after using polynomial features

In [33]:
df_eva_PF = Eva_Matrix_Base_Class(LinearRegression(), df_XtrainPoli, df_XtestPoli, y_train, y_test, 'Linear Regression Polynomial Features')
df_eva_PF

Unnamed: 0,Linear Regression Polynomial Features Training,Linear Regression Polynomial Features Testing
R2_Score,0.44,0.5
MAE,0.74,0.8
MSE,0.96,1.31
RMSE,0.98,1.14


In [34]:
pd.concat([df_eva_base_model, df_eva_PF], axis=1).T

Unnamed: 0,R2_Score,MAE,MSE,RMSE
Linear Regression Base Model Training,0.44,0.74,0.96,0.98
Linear Regression Base Model Testing,0.51,0.79,1.3,1.14
Linear Regression Polynomial Features Training,0.44,0.74,0.96,0.98
Linear Regression Polynomial Features Testing,0.5,0.8,1.31,1.14


Conclusion:

From the matrix evalution table above we can see that the results given are not much different, so we can use polynomial features and non-polynomial features in modeling.