#### **Linear Regression**

---
Linear Regression predicts a value of a dependent value by an independent value. 

On the other hand, its the way to predict the evolution or regression of this value in contrast to the variation of another value.

In [59]:
# General imports
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

##### Base Health Plan

###### Get base and set variables

In [60]:
base_health_plan = pd.read_csv('../examples/health_plan.csv')
base_health_plan

Unnamed: 0,idade,custo
0,18,871
1,23,1132
2,28,1242
3,33,1356
4,38,1488
5,43,1638
6,48,2130
7,53,2454
8,58,3066
9,63,4090


In [61]:
# get ages
x_health_plan = base_health_plan.iloc[:, 0].values
x_health_plan

array([18, 23, 28, 33, 38, 43, 48, 53, 58, 63], dtype=int64)

In [62]:
# reshape variable for apply like param of the learning model function
x_health_plan_matrix = x_health_plan.reshape(-1, 1)
x_health_plan_matrix

array([[18],
       [23],
       [28],
       [33],
       [38],
       [43],
       [48],
       [53],
       [58],
       [63]], dtype=int64)

In [63]:
# get costs
y_health_plan = base_health_plan.iloc[:, 1].values
y_health_plan

array([ 871, 1132, 1242, 1356, 1488, 1638, 2130, 2454, 3066, 4090],
      dtype=int64)

In [64]:
# covariation coefficient
np.corrcoef(x_health_plan, y_health_plan)
# means that 93,09% of costs of peoples can be explained by ages variation

array([[1.        , 0.93091958],
       [0.93091958, 1.        ]])

###### Learning

In [65]:
from sklearn.linear_model import LinearRegression

regressor_health_plan = LinearRegression()
regressor_health_plan.fit(x_health_plan_matrix, y_health_plan)

In [66]:
# b0 - constant
regressor_health_plan.intercept_

-558.9490909090912

In [67]:
# b1 - coeficient
regressor_health_plan.coef_

array([61.86787879])

In [68]:
# predict realize this equation for each value y = b0 + b1 * x1
# x1 its the costs
predict = regressor_health_plan.predict(x_health_plan_matrix)
predict

array([ 554.67272727,  864.01212121, 1173.35151515, 1482.69090909,
       1792.03030303, 2101.36969697, 2410.70909091, 2720.04848485,
       3029.38787879, 3338.72727273])

###### Graphics

In [69]:
# Generate graphic
graphic = px.scatter(x=x_health_plan, y=y_health_plan)
graphic.add_scatter(x=x_health_plan, y=predict, name='Regression')
graphic.show()

In [70]:
regressor_health_plan.score(x_health_plan_matrix, y_health_plan)

0.8666112727879174

##### Base House's Price

###### Get base and set variables

In [None]:
base_house = pd.read_csv('../examples/house_prices.csv')
# remove column 'date' to generate heatmap
base_house.drop('date', axis='columns', inplace=True)
base_house

In [None]:
base_house.describe()

In [None]:
# verify how much null values was fouded
base_house.isnull().sum()

In [None]:
fig = plt.figure(figsize=(20,20))
# heatmap its necessary to identify correlations between categories
# 1 = 100% of correlation
sns.heatmap(base_house.corr(), annot=True)

In [None]:
x_houses = base_house.iloc[:, 4:5].values
x_houses

In [None]:
y_houses = base_house.iloc[:, 1].values
y_houses

###### Learning

In [77]:
# split base in test and trainning
from sklearn.model_selection import train_test_split
x_house_trainning, x_house_test, y_house_trainning, y_house_test = train_test_split(x_houses, y_houses, test_size = 0.3, random_state = 0)

In [None]:
x_house_trainning.shape, y_house_trainning.shape

In [None]:
x_house_test.shape, y_house_test.shape

In [None]:
from sklearn.linear_model import LinearRegression
regressor_simple_houses = LinearRegression()
regressor_simple_houses.fit(x_house_trainning, y_house_trainning)

In [None]:
# b0
regressor_simple_houses.intercept_

In [None]:
# b1
regressor_simple_houses.coef_

In [None]:
regressor_simple_houses.score(x_house_trainning, y_house_trainning)

In [None]:
regressor_simple_houses.score(x_house_test, y_house_test)

In [None]:
predict = regressor_simple_houses.predict(x_house_trainning)
predict

In [None]:
predict_test = regressor_simple_houses.predict(x_house_test)
predict_test

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
# margin of error to house's price
mean_absolute_error(y_house_test, predict_test)
#mean_squared_error(y_house_test, predict_test)

###### Graphic

In [None]:
graphic = px.scatter(x = x_house_trainning.ravel(), y = predict)
graphic.show()

In [None]:
graphic1 = px.scatter(x = x_house_trainning.ravel(), y = y_house_trainning)

graphic2 = px.line(x = x_house_trainning.ravel(), y = predict)
graphic2.data[0].line.color = 'red'

graphic3 = go.Figure(data=graphic1.data + graphic2.data)
graphic3

In [None]:
# Graphic with base test
graphic1 = px.scatter(x = x_house_test.ravel(), y = y_house_test)

graphic2 = px.line(x = x_house_test.ravel(), y = predict_test)
graphic2.data[0].line.color = 'red'

graphic3 = go.Figure(data=graphic1.data + graphic2.data)
graphic3

#### **Multiple Linear Regressions**

---

In [None]:
# General imports
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

##### Base House's Price

###### Get base and set variables

In [None]:
base_house = pd.read_csv('../examples/house_prices.csv')
base_house

In [None]:
x_houses = base_house.iloc[:, 3:19].values
x_houses

In [None]:
y_houses = base_house.iloc[:, 2].values
y_houses

In [106]:
# split base in test and trainning
from sklearn.model_selection import train_test_split
x_house_trainning, x_house_test, y_house_trainning, y_house_test = train_test_split(x_houses, y_houses, test_size = 0.3, random_state = 0)

###### Learning

In [None]:
from sklearn.linear_model import LinearRegression
regressor_multiple_houses = LinearRegression()
regressor_multiple_houses.fit(x_house_trainning, y_house_trainning)

In [None]:
# b0
regressor_multiple_houses.intercept_

In [None]:
# b1
regressor_multiple_houses.coef_
#len(regressor_multiple_houses.coef_)

In [None]:
regressor_multiple_houses.score(x_house_trainning, y_house_trainning)

In [None]:
regressor_multiple_houses.score(x_house_test, y_house_test)

In [None]:
predict = regressor_multiple_houses.predict(x_house_test)
predict