<a href="https://colab.research.google.com/github/KevinHern/AI-Crash-Course/blob/main/AI_Crash_Course_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regressions

[Presentation: AI Crash Course 01](https://view.genial.ly/61944323572a5e0d861065bb/presentation-ai-crashcourse01)

## 0) Preparations

In [None]:
# ----- Libraries ----- #

# For graph plotting
import matplotlib.pyplot as plt

# For dataset manipulation
import pandas as pd

# For visualizing more complex maps
import seaborn as sns

# For statistical analysis and Models
import statsmodels.api as sm
import statsmodels.formula.api as smapi
import numpy as np

In [None]:
'''
All the information regarding the dataset used for this demo can be found in the following link:
https://archive.ics.uci.edu/ml/datasets/auto+mpg
'''

# Getting Dataset
!wget http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data

In [None]:
# Loading Dataset and have a glimpse about it
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin']

raw_dataset = pd.read_csv("auto-mpg.data", names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

# Brief Statistical Summary of the dataset
raw_dataset.describe()

In [None]:
# Lets check columns
raw_dataset.columns

In [None]:
# Summary of the dataset
raw_dataset.head()

In [None]:
# Returns a form of (# rows, # columns)
raw_dataset.shape

In [None]:
# Lets make a copy
new_dataset = raw_dataset.copy()

# Lets check for null values
new_dataset.isna().sum()

# Dropping null rows
new_dataset = new_dataset.dropna()

# Checking new dataset
new_dataset.head()

In [None]:
# Lets visualize the data
sns.pairplot(new_dataset[["Cylinders", "Displacement", "Weight", "Acceleration", "Horsepower", "MPG"]], diag_kind="kde")

## 1) Linear Model (Simplest)

One independent variables tries to explain the target dependent variable

In [None]:
# Lets try to predict MPG based on Acceleration
model = smapi.ols(formula="MPG ~ Horsepower", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
# Lets try to predict MPG based on Acceleration
model = smapi.ols(formula="MPG ~ Acceleration", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
# Lets try to predict MPG based on Weight
model = smapi.ols(formula="MPG ~ Weight", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
# Lets try to predict MPG based on Displacement
model = smapi.ols(formula="MPG ~ Displacement", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
# Lets try to predict MPG based on Cylinders
model = smapi.ols(formula="MPG ~ Cylinders", data=new_dataset)
model = model.fit()
print(model.summary())

## 2) Multilinear Model

In [None]:
model = smapi.ols(formula="MPG ~ Acceleration + Weight + Displacement", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
model = smapi.ols(formula="MPG ~ Acceleration + Weight", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
model = smapi.ols(formula="MPG ~ Horsepower + Cylinders", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
model = smapi.ols(formula="MPG ~ Acceleration + Weight + Displacement + Cylinders", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
model = smapi.ols(formula="MPG ~ Acceleration + Weight + Displacement + Horsepower + Cylinders", data=new_dataset)
model = model.fit()
print(model.summary())

## 3) Polynomial Regression

In [None]:
model = smapi.ols(formula="MPG ~ I(Weight**2)", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
model = smapi.ols(formula="MPG ~ I(Horsepower**2)", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
model = smapi.ols(formula="MPG ~ I(Horsepower**2) + I(Weight**2)", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
model = smapi.ols(formula="MPG ~ I(Horsepower**2) + I(Weight**2) + Acceleration", data=new_dataset)
model = model.fit()
print(model.summary())

In [None]:
model = smapi.ols(formula="MPG ~ I(Horsepower**2) + Weight + Acceleration", data=new_dataset)
model = model.fit()
print(model.summary())

## 4) Visualizing

In [None]:
model = smapi.ols(formula='MPG ~ Acceleration', data=new_dataset).fit()

X_plot = np.linspace(new_dataset['Acceleration'].min(), new_dataset['Acceleration'].max(), new_dataset.shape[0])
Y_plot = model.params[1]*X_plot+model.params[0]

g = sns.FacetGrid(new_dataset, size = 6)
g = g.map(plt.scatter, "Acceleration", "MPG", edgecolor="w")
plt.plot(X_plot, Y_plot, color='r')
plt.show()

In [None]:
model = smapi.ols(formula='MPG ~ Cylinders', data=new_dataset).fit()

X_plot = np.linspace(new_dataset['Cylinders'].min(), new_dataset['Cylinders'].max(), new_dataset.shape[0])
Y_plot = model.params[1]*X_plot+model.params[0]

g = sns.FacetGrid(new_dataset, size = 6)
g = g.map(plt.scatter, "Cylinders", "MPG", edgecolor="w")
plt.plot(X_plot, Y_plot, color='r')
plt.show()

In [None]:
model = smapi.ols(formula='MPG ~ Weight', data=new_dataset).fit()

X_plot = np.linspace(new_dataset['Weight'].min(), new_dataset['Weight'].max(), new_dataset.shape[0])
Y_plot = model.params[1]*X_plot+model.params[0]

g = sns.FacetGrid(new_dataset, size = 6)
g = g.map(plt.scatter, "Weight", "MPG", edgecolor="w")
plt.plot(X_plot, Y_plot, color='r')
plt.show()

In [None]:
model = smapi.ols(formula='MPG ~ I(Horsepower**2)', data=new_dataset).fit()

X_plot = np.linspace(new_dataset['Horsepower'].min(), new_dataset['Horsepower'].max(), new_dataset.shape[0])
Y_plot = model.params[1]*X_plot*X_plot+model.params[0]

g = sns.FacetGrid(new_dataset, size = 6)
g = g.map(plt.scatter, "Horsepower", "MPG", edgecolor="w")
plt.plot(X_plot, Y_plot, color='r')
plt.show()