# Simple Linear Regression

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

plt.rcParams['lines.linewidth'] = 3
plt.rcParams['figure.figsize'] = [8, 5]
plt.rcParams['font.size'] = 12
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['figure.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 20
# plt.rcParams.keys()

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

### Import dataset

In [14]:
df = pd.read_csv("../../datasets/group_14.csv")

# Faz Focus Factor usar ponto decimal invés de vírgula
df["focus_factor"] = (df["focus_factor"].astype(str).str.replace(",", ".", regex=False))
df["focus_factor"] = pd.to_numeric(df["focus_factor"], errors="coerce")

# Faz enconding to Target Class removendo a primeira classe para servir de baseline e evitar multicolinearidade.
df_encoded = pd.get_dummies(data=df, columns=["target_class"], drop_first=True)

df_encoded


Unnamed: 0,duration_1,duration_2,duration_3,duration_4,duration_5,loudness_level,popularity_level,tempo_class,time_signature,key_mode,...,is_dance_hit,temp_zscore,resonance_factor,timbre_index,echo_constant,distorted_movement,signal_power,target_regression,target_class_class_65,target_class_class_73
0,0.0,0.0,1.0,0.0,0.0,4.0,4.0,1.0,0.221824,0.767388,...,0.0,-0.564732,0.612561,0.473008,1,0.951927,0.201,1.917123,False,False
1,0.0,0.0,1.0,0.0,0.0,0.0,4.0,1.0,0.221824,-0.667708,...,0.0,-0.567234,1.391972,0.738998,1,1.107509,0.864,2.320623,False,False
2,0.0,0.0,0.0,1.0,0.0,3.0,4.0,1.0,0.221824,1.048779,...,0.0,-0.404616,0.341971,0.387795,1,-1.975317,0.863,2.230956,False,False
3,0.0,0.0,1.0,0.0,0.0,3.0,3.0,1.0,0.221824,-0.358178,...,0.0,1.596399,-0.020531,0.546116,1,1.257328,0.961,0.392792,False,False
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.221824,1.048779,...,0.0,0.928482,0.725521,0.388133,1,-0.955392,0.671,1.603291,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,0.0,0.0,1.0,0.0,0.0,3.0,1.0,1.0,0.221824,0.767388,...,0.0,0.473886,-0.731518,0.354013,1,-0.281204,0.919,-1.490205,False,True
2996,0.0,1.0,0.0,0.0,0.0,2.0,1.0,1.0,-2.089669,0.485996,...,0.0,-0.410720,-1.965594,0.021253,1,1.718312,0.938,-1.490205,False,True
2997,0.0,0.0,0.0,1.0,0.0,0.0,3.0,1.0,0.221824,0.485996,...,0.0,0.660255,0.564745,0.320107,1,0.433321,0.805,0.303126,False,True
2998,0.0,1.0,0.0,0.0,0.0,4.0,3.0,1.0,0.221824,-0.920961,...,0.0,0.844588,1.336067,0.484141,1,0.756009,0.669,0.213459,False,True
