In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
songs = pd.read_csv("data/songsClean.csv")
songs.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,mode,speechiness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,10357,8100,11741,73,0.024557,0,0.629239,-0.717147,1,0,0.551843,-0.504111,0.758735,0.929315,-1.141854,3,0
1,4qPNDBW1i3p13qLCt0Ki3A,3287,14796,22528,55,-0.730889,0,-0.845908,-1.889974,1,1,-0.078995,-0.504097,-0.591216,-0.798681,-1.489708,3,0
2,1iJBSr7s7jYXzM8EGcbK5b,12397,39162,60774,57,-0.160353,0,-0.742187,-1.122667,0,1,-0.273827,-0.504115,-0.507172,-1.365679,-1.528303,3,0
3,6lfxq3CG4xtTiEg7opyCyx,14839,8580,9580,71,-0.243236,0,-1.733301,-2.312987,0,1,-0.457309,-0.503886,-0.428381,-1.276965,1.987857,2,0
4,5vjLSffimiIP26QG5WcN2K,5255,16899,25689,82,-0.271942,0,0.295026,-0.788709,2,1,-0.303146,-0.504115,-0.68629,-1.184394,-0.073343,3,0


## Preparando o modelo

In [31]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [4]:
features = [col for col in songs.columns[1:] if col != "popularity"]
target = "popularity"

In [5]:
x_train, x_test, y_train, y_test = train_test_split(songs[features], songs[target], test_size=.25, random_state=23)

## Construindo os modelos

In [32]:
def modelAvaliator(mod):
    mod.fit(x_train, y_train)
    predictions = mod.predict(x_test)
    
    r2 = r2_score(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)
    
    print("R2: {}\nMean Absolute Error: {}\nMean Squared Error: {}".format(r2, mae, mse))

### Linear Regression

In [7]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge

In [33]:
lr = LinearRegression()
modelAvaliator(lr)

R2: 0.023986154791984293
Mean Absolute Error: 18.417467845710746
Mean Squared Error: 487.90892799735747


In [34]:
ls = Lasso()
modelAvaliator(ls)

R2: 0.011222802754318528
Mean Absolute Error: 18.742659870064603
Mean Squared Error: 494.28932253881317


In [35]:
rd = Ridge()
modelAvaliator(rd)

R2: 0.02398628159327454
Mean Absolute Error: 18.41747201596411
Mean Squared Error: 487.9088646094436


### Decision Tree Regressor

In [23]:
from sklearn.tree import DecisionTreeRegressor

In [36]:
dt = DecisionTreeRegressor()
modelAvaliator(dt)

R2: 0.1114193924468555
Mean Absolute Error: 13.294736842105262
Mean Squared Error: 444.20108771929824


### Ensembles

In [37]:
from sklearn.ensemble import RandomForestRegressor

In [38]:
rf = RandomForestRegressor()
modelAvaliator(rf)

R2: 0.5925180967433561
Mean Absolute Error: 10.607572060985797
Mean Squared Error: 203.70003926931926
