**Importando módulos necessários**

In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing, tree, linear_model
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error, r2_score, median_absolute_error, explained_variance_score
import seaborn as sns
import matplotlib.pyplot as plt

**Carrega-se um dataframe a partir do .csv gerado anteriormente, com os dados de texto tratados corretamente.**

In [2]:
df = pd.read_csv("Vectorized.csv", header=None)

**Separando o target das features**

In [3]:
targets = df.iloc[:,-1:]
features = df.iloc[:,0:-1]

**Separando teste de treino**

In [4]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.4, random_state=42) 

**Criando classificadores**

In [5]:
decis_tree_regr_1 = tree.DecisionTreeRegressor(max_depth=5)
decis_tree_regr_2 = tree.DecisionTreeRegressor(max_depth=10)
lasso_regr_1 = linear_model.Lasso(0.1)
lasso_regr_2 = linear_model.Lasso(1)

**Treinando Modelos**

In [None]:
decis_tree_regr_1.fit(X_train, y_train)
decis_tree_regr_2.fit(X_train, y_train)
lasso_regr_1.fit(X_train, y_train)
lasso_regr_2.fit(X_train, y_train)

**Aplicaondo modelo no Teste**

In [22]:
y_decis_tree_1 = decis_tree_regr_1.predict(X_test)
y_decis_tree_2 = decis_tree_regr_2.predict(X_test)
y_lasso_regr_1 = lasso_regr_1.predict(X_test)
y_lasso_regr_2 = lasso_regr_2.predict(X_test)

**Resultados do DecisionTreeRegressor com profundidade 5**

In [29]:
print("Median absolute error: %.2f" % median_absolute_error(y_test, y_decis_tree_1))
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_decis_tree_1))
print('R² score: %.2f' % r2_score(y_test, y_decis_tree_1))
print('Explained variance score:', explained_variance_score(y_test, y_decis_tree_1))

Median absolute error: 8936.13
Mean squared error: 243629731.06
R² score: 0.20
Explained variance score: 0.20288124982904043


**Resultados do DecisionTreeRegressor com profundidade 10**

In [31]:
print("Median absolute error: %.2f" % median_absolute_error(y_test, y_decis_tree_2))
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_decis_tree_2))
print('R² score: %.2f' % r2_score(y_test, y_decis_tree_2))
print('Explained variance score:', explained_variance_score(y_test, y_decis_tree_2))

Median absolute error: 7698.90
Mean squared error: 227881512.79
R² score: 0.25
Explained variance score: 0.25440958157618354


**Resultados do Lasso com alpha 0.1**

In [35]:
print("Median absolute error: %.2f" % median_absolute_error(y_test, y_lasso_regr_1))
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_lasso_regr_1))
print('R² score: %.2f' % r2_score(y_test, y_lasso_regr_1))
print('Explained variance score:', explained_variance_score(y_test, y_lasso_regr_1))

Median absolute error: 7308.92
Mean squared error: 191991001.82
R² score: 0.37
Explained variance score: 0.37183225145953824


**Resultados do Lasso com alpha 1**

In [36]:
print("Median absolute error: %.2f" % median_absolute_error(y_test, y_lasso_regr_2))
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_lasso_regr_2))
print('R² score: %.2f' % r2_score(y_test, y_lasso_regr_2))
print('Explained variance score:', explained_variance_score(y_test, y_lasso_regr_2))

Median absolute error: 7308.99
Mean squared error: 191993698.01
R² score: 0.37
Explained variance score: 0.37182355665342737
