In [None]:
%matplotlib inline

# Polynomial Regression Example

We'll still use the diabetes dataset, but noiw let's try to fit the data using polynomial regression.


## Installation

First, let's install the required libraries then import them:

In [None]:
%pip install matplotlib
%pip install numpy
%pip install sklearn
%pip install pandas

In [None]:
# Import the libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

## Viewing the data

Next, let's see the data we're working with

In [None]:
# Load the diabetes dataset
pos_salaries = pd.read_csv("https://raw.githubusercontent.com/2blam/ML/master/decision_tree_regression/Position_Salaries.csv")

In [None]:
pos_salaries.head()

## Choosing our input

Let's use the level as our input feature and the salary as the output feature.

In [None]:
# Create a variable called level and assign it the Level column
level = pos_salaries['Level'].to_numpy()
# Create a variable called salary and assign it the Salary column
salary = pos_salaries["Salary"].to_numpy()

<details><summary>Click to cheat</summary>

```python
# Create a variable called level and assign it the Level column
level = pos_salaries['Level'].to_numpy()
# Create a variable called salary and assign it the Salary column
salary = pos_salaries["Salary"].to_numpy()
```
</details>

## Creating the model

In [None]:
# choose a degree
degree = 3

# Create polynomial regression object
poly = PolynomialFeatures(degree=degree, include_bias=False)

# Transform the model into a linear one
level_poly = poly.fit_transform(level.reshape(-1, 1))

# Create the linear model
model = LinearRegression()

# Train the model using the training sets
model.fit(level_poly, salary)

# Get the predictions
salary_pred = model.predict(level_poly)

<details><summary>Click to cheat</summary>

```python
# choose a degree
degree = 3

# Create polynomial regression object
poly = PolynomialFeatures(degree=degree, include_bias=False)

# Transform the model into a linear one
level_poly = poly.fit_transform(level.reshape(-1, 1))

# Create the linear model
model = LinearRegression()

# Train the model using the training sets
model.fit(level_poly, salary)

# Get the predictions
salary_pred = model.predict(level_poly)
```
</details>

## Evaluting our model

We can print the raw numbers and plot!

In [None]:
# The weights
print("Weight:", model.coef_)
# The bais
print("Bias:", model.intercept_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(salary, salary_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(salary, salary_pred))

In [None]:
# Plot outputs
plt.scatter(level, salary, color="black")
plt.scatter(level, salary_pred, color="green")
plt.plot(level, salary_pred, color="blue", linewidth=1)

plt.xlabel("Level")
plt.ylabel("Salary ($)")

plt.show()

## Choosing our degree

Now let's be smarter and choose a degree using the BIC.

In [None]:
# Our BIC function
def bic(n, k, ssr):
    return k * np.log(n) + n * np.log(ssr)

# Our squared error function
def squared_error(trueY, predY):
    return mean_squared_error(trueY, predY) * len(trueY)

# Let's create our array of BIC values
bics = []
for i in range(2, 20):
    # Create the model of degree i-1

    # Fit the data

    # Predict the model

    # Calculate the BIC

bics = np.array(bics)
k = np.arange(2, 20, 1)

<details><summary>Click to cheat</summary>

```python
# Let's create our array of BIC values
bics = []
for i in range(2, 20):
    # Create the model of degree i-1
    poly = PolynomialFeatures(degree=i - 1, include_bias=False)
    level_poly = poly.fit_transform(level.reshape(-1, 1))
    model = LinearRegression()

    # Fit the data
    model.fit(level_poly, salary)

    # Predict the model
    salary_pred = model.predict(level_poly)

    # Calculate the BIC
    ssr = squared_error(salary, salary_pred)
    bics.append(bic(len(salary), i, ssr))

bics = np.array(bics)
k = np.arange(2, 20, 1)

```
</details>

In [None]:
plt.close()
plt.plot(k, bics, color="blue", linewidth=1)

plt.xlabel("k")
plt.ylabel("BIC")

plt.show()