In [50]:
import numpy as np
import pandas as pd
import seaborn as sns

In [51]:
df = sns.load_dataset('tips')

In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [53]:
df['day'].value_counts()

day
Sat     87
Sun     76
Thur    62
Fri     19
Name: count, dtype: int64

In [54]:
df['smoker'].value_counts()

smoker
No     151
Yes     93
Name: count, dtype: int64

In [55]:
# Dependent and Independent feature

df.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [56]:
X = df[['tip', 'sex', 'smoker', 'day', 'time', 'size']]
y = df['total_bill']

In [57]:
from sklearn.preprocessing import LabelEncoder

l1 = LabelEncoder()
l2 = LabelEncoder()
l3 = LabelEncoder()

In [58]:
import warnings
warnings.filterwarnings('ignore')
X['sex'] = l1.fit_transform(X['sex'])
X['smoker'] = l2.fit_transform(X['smoker'])
X['time'] = l3.fit_transform(X['time'])

In [59]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [60]:
import sys
np.set_printoptions(threshold=sys.maxsize)
print(X)

[[ 0.    0.    1.    0.    1.01  0.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    1.66  1.    0.    0.    3.  ]
 [ 0.    0.    1.    0.    3.5   1.    0.    0.    3.  ]
 [ 0.    0.    1.    0.    3.31  1.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    3.61  0.    0.    0.    4.  ]
 [ 0.    0.    1.    0.    4.71  1.    0.    0.    4.  ]
 [ 0.    0.    1.    0.    2.    1.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    3.12  1.    0.    0.    4.  ]
 [ 0.    0.    1.    0.    1.96  1.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    3.23  1.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    1.71  1.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    5.    0.    0.    0.    4.  ]
 [ 0.    0.    1.    0.    1.57  1.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    3.    1.    0.    0.    4.  ]
 [ 0.    0.    1.    0.    3.02  0.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    3.92  1.    0.    0.    2.  ]
 [ 0.    0.    1.    0.    1.67  0.    0.    0.    3.  ]
 [ 0.    0.    1.    0.    3.71

In [61]:
# Split the dataset into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [62]:
# feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [63]:
from sklearn.svm import SVR
regressor = SVR(kernel='linear')
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)


In [64]:
from sklearn.metrics import r2_score, mean_squared_error
print('R2 Score:', r2_score(y_test, y_pred))
print('MSE:', mean_squared_error(y_test, y_pred))

R2 Score: 0.666418187261878
MSE: 28.28391618693945


In [67]:
# Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
parameters = {'C':[0.1,1,10,100], 'kernel':['linear', 'rbf', 'poly'], 'gamma':['scale', 'auto']}
grid_search = GridSearchCV(estimator=SVR(), param_grid=parameters, scoring='r2', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

0,1,2
,estimator,SVR()
,param_grid,"{'C': [0.1, 1, ...], 'gamma': ['scale', 'auto'], 'kernel': ['linear', 'rbf', ...]}"
,scoring,'r2'
,n_jobs,-1
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,10
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [68]:
grid_search.best_params_

{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}

In [71]:
from sklearn.metrics import r2_score, mean_squared_error
y_pred_gs = grid_search.predict(X_test)
print('R2 Score after Hyperparameter Tuning:', r2_score(y_test, y_pred_gs))
print('MSE after Hyperparameter Tuning:', mean_squared_error(y_test, y_pred_gs))

R2 Score after Hyperparameter Tuning: 0.661013322573406
MSE after Hyperparameter Tuning: 28.742186794067845
