# SVR Notebook

#### *Author: Kunyu He*
#### *University of Chicago, CAPP'20*

In [101]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR

%matplotlib notebook

### Load Data

In [102]:
salary = pd.read_csv("Position_Salaries.csv")
salary.head()

Unnamed: 0,Position,Level,Salary
0,Business Analyst,1,45000
1,Junior Consultant,2,50000
2,Senior Consultant,3,60000
3,Manager,4,80000
4,Country Manager,5,110000


### Data Cleaning

In [103]:
salary.isnull().sum()

Position    0
Level       0
Salary      0
dtype: int64

No value missing.

### Feature Selection

In [104]:
X = salary.iloc[:, 1:2].values
X.shape

(10, 1)

In [105]:
y = salary.Salary.values.reshape(-1, 1)
y.shape

(10, 1)

### Feature Scaling

In [106]:
sc_X = StandardScaler()
sc_y = StandardScaler()

In [116]:
X_scaled = sc_X.fit_transform(X)
y_scaled = sc_y.fit_transform(y)



### Model Training

As we only have ten observations, we are using the whole data set to train our model.

In [110]:
svr = SVR(kernel='rbf', gamma='auto')
SVR_raw = svr.fit(X, np.ravel(y))
SVR_sclaed = svr.fit(X_scaled, np.ravel(y_scaled))

### Model Evaluation

#### Without Scaling

In [112]:
plt.scatter(X, y, color="red")
plt.plot(X, SVR_raw.predict(X), color="blue")

plt.title("Salary against Position Level (SVR without scaling)")
plt.xlabel("Position Level")
plt.ylabel("Salary ($)")
plt.show()

<IPython.core.display.Javascript object>

#### Scaled

In [115]:
plt.scatter(X_scaled, y_scaled, color="red")
plt.plot(X_scaled, SVR_sclaed.predict(X_scaled), color="blue")

plt.title("Salary against Position Level (SVR sclaed)")
plt.xlabel("Position Level")
plt.ylabel("Salary ($)")
plt.show()

<IPython.core.display.Javascript object>