## Data Preprocessing

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
dataset = pd.read_csv("Salary_Data.csv")

In [None]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 1].values

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)

## 1. Fitting (Simple) Linear Regression model on data now

$$ \Large y = h_0+h_1x_1$$

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
y_predict = regressor.predict(X_test)

In [None]:
y_test, y_predict

In [None]:
plt.scatter(X_train, y_train, color="red")
plt.plot(X_train, regressor.predict(X_train), color="cyan")

In [None]:
plt.title("Experiance vs Salary (Training Data)")
plt.xlabel("Experiance")
plt.ylabel("Salary")

In [None]:
plt.show()

In [None]:
plt.scatter(X_test, y_test, color = "red")
plt.plot(X_train, regressor.predict(X_train), color = "cyan")

In [None]:
plt.title("Experiance vs Salary (Testing Data)")
plt.xlabel("Experiance")
plt.ylabel("Salary")
plt.show()

## 2. Fitting (Multiple) Linear Regression Model on data now

$$ \Large y = h_0+h_1x_1+h_2x_2+h_3x_3+\ldots+h_nx_n $$

In [None]:
dataset = pd.read_csv("50_Startups.csv")

In [None]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [None]:
le_X = LabelEncoder()

In [None]:
X[: ,3] = le_X.fit_transform(X[:, 3])

In [None]:
onehotenc = OneHotEncoder(categorical_features=[3])

In [None]:
X = onehotenc.fit_transform(X).toarray()

In [None]:
X = X[: ,1:]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state=0)

In [None]:
regressor = LinearRegression()

In [None]:
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)

In [None]:
y_pred, y_test

## 3. Fitting Step Regression (Backward Elimination) Model on data now

In [None]:
import statsmodels.formula.api as sm

In [None]:
X = np.append(np.ones((50,1)).astype(int), X, 1)

In [None]:
X_opt = X[: ,[0, 3]]

In [None]:
regressor_OLS = sm.OLS(y, X_opt).fit()
regressor_OLS.summary()

## 4. Fitting Polynomial Regression Model on data now

$$ \Large y = h_0+h_1x_1+h_2x_1^2+h_3x_1^3+\ldots+h_nx_1^n $$

In [None]:
dataset = pd.read_csv("Position_Salaries.csv")

In [None]:
X = dataset.iloc[: ,1:2].values
y = dataset.iloc[: ,2].values
X.shape, y.shape

In [None]:
plt.scatter(X, y, color="blue")
plt.plot(X[:, -1], y, color="cyan")
plt.xlabel("Level")
plt.ylabel("Salary")
plt.title("Polynomial type relationship")
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
reg_lin = LinearRegression()
reg_poly = PolynomialFeatures(4) # Changed degree to 4 for higher accuracy.

In [None]:
reg_lin.fit(X, y)

In [None]:
X_poly = reg_poly.fit_transform(X)
reg_2 = LinearRegression()

In [None]:
reg_2.fit(X_poly, y)

In [None]:
X_grid = np.arange(min(X), max(X), 0.1)
X_grid = X_grid.reshape((len(X_grid), 1))
plt.plot(X_grid, reg_lin.predict(X_grid), color="pink")
plt.plot(X_grid, reg_2.predict(reg_poly.fit_transform(X_grid)), color="red")
plt.plot(X, y, color="black")
plt.xlabel("Level")
plt.ylabel("Salary")
plt.title("Comparison between Linear vs. Polynomial \nPrediction against Original Values")
plt.show()

## 5. Using Support Vector Regression to fit polynomial model to data

In [None]:
dataset = pd.read_csv('Position_Salaries.csv')

In [None]:
X = dataset.iloc[:, 1:2].values
y = dataset.iloc[:, 2:].values

In [None]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y)

In [None]:
regressor = SVR(kernel="rbf")

In [None]:
regressor.fit(X, y.reshape(len(y),))

In [None]:
sc_y.inverse_transform(regressor.predict(sc_X.transform(6.5)))

In [None]:
plt.plot(X, y, color="black")
plt.plot(X, regressor.predict(X), color="red")
plt.xlabel("Level")
plt.ylabel("Salary")
plt.title("Actual Salary (Black) vs. Prediction (Red)")
plt.show()