SVM - Support Vector Machine supports both regression and classification

#### Loading Dataset

In [1]:
# To Avoid Warnings
import warnings
warnings.filterwarnings("ignore")

#importing the Libraies
import numpy as np
import pandas as pd

dataset = pd.read_csv("50_Startups.csv")
dataset.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [2]:
print("Columns:",[i for i in dataset.columns])

Columns: ['R&D Spend', 'Administration', 'Marketing Spend', 'State', 'Profit']


#### Converting Categorical to Numerical Column

In [3]:
dataset = (pd.get_dummies(dataset,drop_first=True)).astype(int)
dataset.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,Profit,State_Florida,State_New York
0,165349,136897,471784,192261,0,1
1,162597,151377,443898,191792,0,0
2,153441,101145,407934,191050,1,0
3,144372,118671,383199,182901,0,1
4,142107,91391,366168,166187,1,0


#### Assigning Input and Output Variables (5 i/p & 1 o/p)

In [4]:
independent=dataset[['R&D Spend', 'Administration', 'Marketing Spend','State_Florida', 'State_New York']]
independent.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State_Florida,State_New York
0,165349,136897,471784,0,1
1,162597,151377,443898,0,0
2,153441,101145,407934,1,0
3,144372,118671,383199,0,1
4,142107,91391,366168,1,0


In [5]:
dependent=dataset[["Profit"]]
dependent.head()

Unnamed: 0,Profit
0,192261
1,191792
2,191050
3,182901
4,166187


#### Split of Data into Train and Test

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(independent, dependent, test_size=0.30,random_state=44)

In [7]:
X_train.head(2)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State_Florida,State_New York
2,153441,101145,407934,1,0
12,93863,127320,249839,1,0


In [8]:
y_train.head(2)

Unnamed: 0,Profit
2,191050
12,141585


#### Normalizing input data using Standard Scaler

In [9]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train) # fit - the mean and standard deviation values are calculated
X_test=sc.transform(X_test) # transform - the same calculated mean and standard deviation are applied and transformed

In [10]:
pd.DataFrame(X_train).head(2)

Unnamed: 0,0,1,2,3,4
0,1.643894,-0.683088,1.490407,1.300887,-0.722315
1,0.393038,0.250558,0.25086,1.300887,-0.722315


#### Training Model using Train Data (X_train, y_train)


##### SVM is mostly used for non-linear data. So to make SVM, to consider the data as linear or non-linear, we use the parameter 'kernel'.

In [11]:
from sklearn.svm import SVR
regressor=SVR(kernel="linear",C=1000) # c is the parameter which determines margin width of decision boundary
regressor.fit(X_train,y_train)

In [12]:
print("Intercept: ",regressor.intercept_)
print("Number of Support Vectors:",regressor.n_support_)
print("Support Vector Points:", regressor.support_)

Intercept:  [112934.97766728]
Number of Support Vectors: [35]
Support Vector Points: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34]


#### Example Predicting using X_test

In [13]:
y_pred=regressor.predict(X_test)
(pd.DataFrame(y_pred)).head()

Unnamed: 0,0
0,89938.561679
1,135998.797888
2,64118.162564
3,89989.407936
4,148814.114349


In [14]:
(pd.DataFrame(y_test)).head()

Unnamed: 0,Profit
37,89949
6,156122
47,42559
36,90708
5,156991


In [15]:
from sklearn.metrics import r2_score
r_score=r2_score(y_test,y_pred)

In [16]:
print("R2-Score:",r_score)

R2-Score: 0.8583491079326067


#### Saving Trained Model

In [17]:
import pickle
filename="Support_Vector_Machine_Regression.sav"
pickle.dump(regressor,open(filename,"wb"))