#### **Linear Regression**

---
Linear Regression predicts a value of a dependent value by an independent value. 

On the other hand, its the way to predict the evolution or regression of this value in contrast to the variation of another value.

In [59]:
# General imports
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

##### Base Health Plan

###### Get base and set variables

In [None]:
base_health_plan = pd.read_csv('../examples/health_plan.csv')
base_health_plan

In [None]:
# get ages
x_health_plan = base_health_plan.iloc[:, 0].values
x_health_plan

In [None]:
# reshape variable for apply like param of the learning model function
x_health_plan_matrix = x_health_plan.reshape(-1, 1)
x_health_plan_matrix

In [63]:
# get costs
y_health_plan = base_health_plan.iloc[:, 1].values
y_health_plan

array([ 871, 1132, 1242, 1356, 1488, 1638, 2130, 2454, 3066, 4090],
      dtype=int64)

In [64]:
# covariation coefficient
np.corrcoef(x_health_plan, y_health_plan)
# means that 93,09% of costs of peoples can be explained by ages variation

array([[1.        , 0.93091958],
       [0.93091958, 1.        ]])

###### Learning

In [None]:
from sklearn.linear_model import LinearRegression

regressor_health_plan = LinearRegression()
regressor_health_plan.fit(x_health_plan_matrix, y_health_plan)

In [None]:
# b0 - constant
regressor_health_plan.intercept_

In [None]:
# b1 - coeficient
regressor_health_plan.coef_

In [None]:
# predict realize this equation for each value y = b0 + b1 * x1
# x1 its the costs
predict = regressor_health_plan.predict(x_health_plan_matrix)
predict

###### Graphics

In [None]:
# Generate graphic
graphic = px.scatter(x=x_health_plan, y=y_health_plan)
graphic.add_scatter(x=x_health_plan, y=predict, name='Regression')
graphic.show()

In [None]:
regressor_health_plan.score(x_health_plan_matrix, y_health_plan)

##### Base House's Price

###### Get base and set variables

In [None]:
base_house = pd.read_csv('../examples/house_prices.csv')
# remove column 'date' to generate heatmap
base_house.drop('date', axis='columns', inplace=True)
base_house

In [None]:
base_house.describe()

In [None]:
# verify how much null values was fouded
base_house.isnull().sum()

In [None]:
fig = plt.figure(figsize=(20,20))
# heatmap its necessary to identify correlations between categories
# 1 = 100% of correlation
sns.heatmap(base_house.corr(), annot=True)

In [None]:
x_houses = base_house.iloc[:, 4:5].values
x_houses

In [None]:
y_houses = base_house.iloc[:, 1].values
y_houses

###### Learning

In [29]:
# split base in test and trainning
from sklearn.model_selection import train_test_split
x_house_trainning, x_house_test, y_house_trainning, y_house_test = train_test_split(x_houses, y_houses, test_size = 0.3, random_state = 0)

In [None]:
x_house_trainning.shape, y_house_trainning.shape

In [None]:
x_house_test.shape, y_house_test.shape

In [None]:
from sklearn.linear_model import LinearRegression
regressor_simple_houses = LinearRegression()
regressor_simple_houses.fit(x_house_trainning, y_house_trainning)

In [None]:
# b0
regressor_simple_houses.intercept_

In [None]:
# b1
regressor_simple_houses.coef_

In [None]:
regressor_simple_houses.score(x_house_trainning, y_house_trainning)

In [None]:
regressor_simple_houses.score(x_house_test, y_house_test)

In [None]:
predict = regressor_simple_houses.predict(x_house_trainning)
predict

In [None]:
predict_test = regressor_simple_houses.predict(x_house_test)
predict_test

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
# margin of error to house's price
mean_absolute_error(y_house_test, predict_test)
#mean_squared_error(y_house_test, predict_test)

###### Graphic

In [None]:
graphic = px.scatter(x = x_house_trainning.ravel(), y = predict)
graphic.show()

In [None]:
graphic1 = px.scatter(x = x_house_trainning.ravel(), y = y_house_trainning)

graphic2 = px.line(x = x_house_trainning.ravel(), y = predict)
graphic2.data[0].line.color = 'red'

graphic3 = go.Figure(data=graphic1.data + graphic2.data)
graphic3

In [None]:
# Graphic with base test
graphic1 = px.scatter(x = x_house_test.ravel(), y = y_house_test)

graphic2 = px.line(x = x_house_test.ravel(), y = predict_test)
graphic2.data[0].line.color = 'red'

graphic3 = go.Figure(data=graphic1.data + graphic2.data)
graphic3

#### **Multiple Linear Regression**

---
Different of Linear Regression, Multiple Linear Regression considers more than one category to train and predict the result.

In [None]:
# General imports
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

##### Base House's Price

###### Get base and set variables

In [None]:
base_house = pd.read_csv('../examples/house_prices.csv')
base_house

In [None]:
x_houses = base_house.iloc[:, 3:19].values
x_houses

In [None]:
y_houses = base_house.iloc[:, 2].values
y_houses

In [106]:
# split base in test and trainning
from sklearn.model_selection import train_test_split
x_house_trainning, x_house_test, y_house_trainning, y_house_test = train_test_split(x_houses, y_houses, test_size = 0.3, random_state = 0)

###### Learning

In [None]:
from sklearn.linear_model import LinearRegression
regressor_multiple_houses = LinearRegression()
regressor_multiple_houses.fit(x_house_trainning, y_house_trainning)

In [None]:
# b0
regressor_multiple_houses.intercept_

In [None]:
# b1
regressor_multiple_houses.coef_
#len(regressor_multiple_houses.coef_)

In [None]:
regressor_multiple_houses.score(x_house_trainning, y_house_trainning)

In [None]:
regressor_multiple_houses.score(x_house_test, y_house_test)

In [None]:
predict = regressor_multiple_houses.predict(x_house_test)
predict

#### **Polinomial Regression**

---
In Polynomial Regression, different of Linear Regression, we generate a curve instead of a line.

In [11]:
# General imports
import plotly.express as px
import pandas as pd

##### Base Health Plan

In [None]:
base_health_plan2 = pd.read_csv('../examples/health_plan2.csv')
base_health_plan2

In [4]:
x_health_plan2 = base_health_plan2.iloc[:, 0:1].values
y_health_plan2 = base_health_plan2.iloc[:, 1].values

In [14]:
# normalize values by squaring them
poly = PolynomialFeatures(degree = 4)
x_health_plan2_poly = poly.fit_transform(x_health_plan2)

In [None]:
# trainning
regressor_health_poly = LinearRegression()
regressor_health_poly.fit(x_health_plan2_poly, y_health_plan2)

In [None]:
# b0
regressor_health_poly.intercept_

In [None]:
# b1 (n)
regressor_health_poly.coef_

In [None]:
predict = regressor_health_poly.predict(x_health_plan2_poly)
predict

In [None]:
graphic = px.scatter(x = x_health_plan2[:, 0], y = y_health_plan2)
graphic.add_scatter(x = x_health_plan2[:, 0], y = predict, name = 'Regression')
graphic.show()

##### Base House's Prices

In [None]:
base_house = pd.read_csv('../examples/house_prices.csv')
base_house

In [None]:
x_houses = base_house.iloc[:, 3:19].values
x_houses

In [None]:
y_houses = base_house.iloc[:, 2].values
y_houses

In [51]:
# split base in test and trainning
from sklearn.model_selection import train_test_split
x_house_trainning, x_house_test, y_house_trainning, y_house_test = train_test_split(x_houses, y_houses, test_size = 0.3, random_state = 0)

In [52]:
from sklearn.preprocessing import PolynomialFeatures
# normalize
poly = PolynomialFeatures(degree = 2)
x_house_trainning_poly = poly.fit_transform(x_house_trainning)
x_house_test_poly = poly.transform(x_house_test)

In [None]:
x_house_trainning_poly.shape, x_house_test_poly.shape

In [None]:
from sklearn.linear_model import LinearRegression
# trainning
regressor_houses_poly = LinearRegression()
regressor_houses_poly.fit(x_house_trainning_poly, y_house_trainning)

In [None]:
regressor_houses_poly.score(x_house_trainning_poly, y_house_trainning)

In [None]:
regressor_houses_poly.score(x_house_test_poly, y_house_test)

In [None]:
predict = regressor_houses_poly.predict(x_house_test_poly)
predict

In [None]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_house_test, predict)

#### **Tree Regression**

---

In [14]:
# General imports
import plotly.express as px
import pandas as pd
import numpy as np

##### Base Health Plan

In [None]:
base_health_plan2 = pd.read_csv('../examples/health_plan2.csv')
base_health_plan2

In [4]:
x_health_plan2 = base_health_plan2.iloc[:, 0:1].values
y_health_plan2 = base_health_plan2.iloc[:, 1].values

In [None]:
from sklearn.tree import DecisionTreeRegressor
# trainning
regressor_tree_health = DecisionTreeRegressor()
regressor_tree_health.fit(x_health_plan2, y_health_plan2)

In [None]:
predict = regressor_tree_health.predict(x_health_plan2)
predict

In [None]:
regressor_tree_health.score(x_health_plan2, y_health_plan2)

In [None]:
x_test_tree = np.arange(min(x_health_plan2), max(x_health_plan2), 0.1).reshape(-1, 1)
x_test_tree.shape

In [None]:
graphic = px.scatter(x = x_health_plan2.ravel(), y = y_health_plan2)
graphic.add_scatter(x = x_test_tree.ravel(), y = regressor_tree_health.predict(x_test_tree), name = 'Regression')
graphic.show()

##### Base House's Prices

In [None]:
base_house = pd.read_csv('../examples/house_prices.csv')
base_house

In [None]:
x_houses = base_house.iloc[:, 3:19].values
x_houses

In [None]:
y_houses = base_house.iloc[:, 2].values
y_houses

In [25]:
# split base in test and trainning
from sklearn.model_selection import train_test_split
x_house_trainning, x_house_test, y_house_trainning, y_house_test = train_test_split(x_houses, y_houses, test_size = 0.3, random_state = 0)

In [None]:
from sklearn.tree import DecisionTreeRegressor

regressor_tree_houses = DecisionTreeRegressor()
regressor_tree_houses.fit(x_house_trainning, y_house_trainning)

In [None]:
regressor_tree_houses.score(x_house_trainning, y_house_trainning)

In [None]:
regressor_tree_houses.score(x_house_test, y_house_test)

In [None]:
predict = regressor_tree_houses.predict(x_house_test)
predict

### **Random Forest Regression**

---

In [2]:
# General imports
import plotly.express as px
import pandas as pd
import numpy as np

##### Base Health Plan

In [3]:
# import base
base_health_plan2 = pd.read_csv('../examples/health_plan2.csv')
x_health_plan2 = base_health_plan2.iloc[:, 0:1].values
y_health_plan2 = base_health_plan2.iloc[:, 1].values

In [None]:
# trainning
from sklearn.ensemble import RandomForestRegressor
regressor_random_forest_health = RandomForestRegressor(n_estimators = 10)
regressor_random_forest_health.fit(x_health_plan2, y_health_plan2)

In [None]:
regressor_random_forest_health.score(x_health_plan2, y_health_plan2)

In [None]:
# create a range of real numbers to fill graphic like a line
x_test_tree = np.arange(min(x_health_plan2), max(x_health_plan2), 0.1).reshape(-1, 1)
x_test_tree.shape

In [None]:
graphic = px.scatter(x = x_health_plan2.ravel(), y = y_health_plan2)
graphic.add_scatter(x = x_test_tree.ravel(), y = regressor_random_forest_health.predict(x_test_tree), name = 'Regression')
graphic.show()

##### Base House's Price

In [9]:
# import base
base_house = pd.read_csv('../examples/house_prices.csv')
# get columns
x_houses = base_house.iloc[:, 3:19].values
y_houses = base_house.iloc[:, 2].values
# split regs
from sklearn.model_selection import train_test_split
x_house_trainning, x_house_test, y_house_trainning, y_house_test = train_test_split(x_houses, y_houses, test_size = 0.3, random_state = 0)

In [None]:
# trainning
regressor_random_forest_houses = RandomForestRegressor(n_estimators=100)
regressor_random_forest_houses.fit(x_house_trainning, y_house_trainning)

In [None]:
# scores
[regressor_random_forest_houses.score(x_house_trainning, y_house_trainning),
regressor_random_forest_houses.score(x_house_test, y_house_test)]

In [None]:
predict = regressor_random_forest_houses.predict(x_house_test)
predict

### **SVM Regression**

---

In [None]:
# General imports
import plotly.express as px
import pandas as pd
import numpy as np

from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

##### Base Health Plan

In [25]:
# import base
base_health_plan2 = pd.read_csv('../examples/health_plan2.csv')
x_health_plan2 = base_health_plan2.iloc[:, 0:1].values
y_health_plan2 = base_health_plan2.iloc[:, 1].values

In [35]:
from sklearn.preprocessing import StandardScaler
# normalize
scaler_x = StandardScaler()
x_health_plan2_scaled = scaler_x.fit_transform(x_health_plan2)
scaler_y = StandardScaler()
y_health_plan2_scaled = scaler_y.fit_transform(y_health_plan2.reshape(-1,1))

In [None]:
# Linear Kernel
regressor_svr_health_linear = SVR(kernel = 'linear')
regressor_svr_health_linear.fit(x_health_plan2, y_health_plan2)

graphic = px.scatter(x = x_health_plan2.ravel(), y = y_health_plan2)
graphic.add_scatter(x = x_health_plan2.ravel(), y = regressor_svr_health_linear.predict(x_health_plan2), name = 'Regression')

In [None]:
# Polinomial Kernel
regressor_svr_health_poly = SVR(kernel = 'poly', degree = 4)
regressor_svr_health_poly.fit(x_health_plan2, y_health_plan2)

graphic = px.scatter(x = x_health_plan2.ravel(), y = y_health_plan2)
graphic.add_scatter(x = x_health_plan2.ravel(), y = regressor_svr_health_poly.predict(x_health_plan2), name = 'Regression')

In [None]:
# RBF Kernel
regressor_svr_health_rbf = SVR(kernel = 'rbf')
regressor_svr_health_rbf.fit(x_health_plan2_scaled, y_health_plan2_scaled.ravel())

graphic = px.scatter(x = x_health_plan2_scaled.ravel(), y = y_health_plan2_scaled.ravel())
graphic.add_scatter(x = x_health_plan2_scaled.ravel(), y = regressor_svr_health_rbf.predict(x_health_plan2_scaled), name = 'Regression')

##### Base House's Price

In [42]:
# import base
base_house = pd.read_csv('../examples/house_prices.csv')
# get columns
x_houses = base_house.iloc[:, 3:19].values
y_houses = base_house.iloc[:, 2].values
# split regs
from sklearn.model_selection import train_test_split
x_house_trainning, x_house_test, y_house_trainning, y_house_test = train_test_split(x_houses, y_houses, test_size = 0.3, random_state = 0)

In [49]:
# normalize
scaler_x_trainning = StandardScaler()
x_house_trainning_scaled = scaler_x_trainning.fit_transform(x_house_trainning)
scaler_y_trainning = StandardScaler()
y_house_trainning_scaled = scaler_y_trainning.fit_transform(y_house_trainning.reshape(-1,1))

scaler_x_test = StandardScaler()
x_house_test_scaled = scaler_x_test.fit_transform(x_house_test)
scaler_y_test = StandardScaler()
y_house_test_scaled = scaler_y_test.fit_transform(y_house_test.reshape(-1,1))

In [None]:
# trainning
regressor_svr_houses = SVR(kernel='rbf')
regressor_svr_houses.fit(x_house_trainning_scaled, y_house_trainning_scaled.ravel())

In [None]:
# scores
[regressor_svr_houses.score(x_house_trainning_scaled, y_house_trainning_scaled),
regressor_svr_houses.score(x_house_test_scaled, y_house_test_scaled)]

In [None]:
predict = regressor_svr_houses.predict(x_house_test_scaled)
predict