### Support Vector Regression

In [125]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


In [126]:
df=sns.load_dataset('tips')

In [127]:
df.head() ## We have to predict total_bill based on remianing features

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [128]:
df.info() ## Some are categorical features

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [129]:
df['sex'].value_counts()

sex
Male      157
Female     87
Name: count, dtype: int64

In [130]:
df['smoker'].value_counts()

smoker
No     151
Yes     93
Name: count, dtype: int64

In [131]:
df['day'].value_counts()

day
Sat     87
Sun     76
Thur    62
Fri     19
Name: count, dtype: int64

In [132]:
df['time'].value_counts()

time
Dinner    176
Lunch      68
Name: count, dtype: int64

In [133]:
df.isnull().sum() ## No null values

total_bill    0
tip           0
sex           0
smoker        0
day           0
time          0
size          0
dtype: int64

In [134]:
df.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [135]:
## Dependent and independent features
x=df[['tip', 'sex', 'smoker', 'day', 'time', 'size']]
y=df['total_bill']

In [136]:
## train test split
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=10)

In [137]:
x_train[x_train['day']=='Fri']

Unnamed: 0,tip,sex,smoker,day,time,size
99,1.5,Male,No,Fri,Dinner,2
95,4.73,Male,Yes,Fri,Dinner,4
101,3.0,Female,Yes,Fri,Dinner,2
90,3.0,Male,Yes,Fri,Dinner,2
225,2.5,Female,Yes,Fri,Lunch,2
97,1.5,Male,Yes,Fri,Dinner,2
96,4.0,Male,Yes,Fri,Dinner,2
223,3.0,Female,No,Fri,Lunch,3
94,3.25,Female,No,Fri,Dinner,2
93,4.3,Female,Yes,Fri,Dinner,2


Now do label encoding for sex,smoker,time as these are binary and do ONE HOT ENCODING(OHE) for day as there are 4 categories

In [138]:
from sklearn.preprocessing import LabelEncoder  
le1=LabelEncoder() ## Total 3 binary category features are there so 3 les required
le2=LabelEncoder()
le3=LabelEncoder()

In [139]:
x_train['sex']=le1.fit_transform(x_train['sex'])
x_train['smoker']=le2.fit_transform(x_train['smoker'])
x_train['time']=le3.fit_transform(x_train['time'])

In [140]:
x_train.head()

Unnamed: 0,tip,sex,smoker,day,time,size
58,1.76,1,1,Sat,0,2
1,1.66,1,0,Sun,0,3
2,3.5,1,0,Sun,0,3
68,2.01,1,0,Sat,0,2
184,3.0,1,1,Sun,0,2


In [141]:
### Now do encoding for test data
x_test['sex']=le1.transform(x_test['sex'])
x_test['smoker']=le2.transform(x_test['smoker'])
x_test['time']=le3.transform(x_test['time'])

In [142]:
x_test.head()

Unnamed: 0,tip,sex,smoker,day,time,size
162,2.0,0,0,Sun,0,3
60,3.21,1,1,Sat,0,2
61,2.0,1,1,Sat,0,2
63,3.76,1,1,Sat,0,4
69,2.09,1,1,Sat,0,2


In [143]:
## OHE for using Column Tranformer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [144]:
ct=ColumnTransformer(transformers=[('onehot',OneHotEncoder(drop='first'),[3])],remainder='passthrough')

In [None]:
x_train=ct.fit_transform(x_train)

In [147]:
x_test=ct.transform(x_test)

In [148]:
from sklearn.svm import SVR
svr=SVR()

In [149]:
svr.fit(x_train,y_train)

In [152]:
from sklearn.metrics import r2_score
y_pred=svr.predict(x_test)
score=r2_score(y_test,y_pred)
print("Accuracy Score: ",score)

Accuracy Score:  0.46028114561159283
