In [3]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

# ML library imports 
import torch
#import tensorflow as tf
import sklearn, scipy
from tqdm import tqdm

# Checking versions
print(f"Torch Version: {torch.__version__}")
print(f"Using GPU: {torch.cuda.is_available()}")

Torch Version: 2.0.0
Using GPU: True


In [12]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC, SVR
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, f1_score, confusion_matrix, mean_squared_error, mean_absolute_error
from sklearn.utils import resample
from sklearn.ensemble import RandomForestClassifier

TASK = 'REG'

data = pd.read_pickle("../final_dataset_splits.pkl")
train = data[data.train].drop(['train'], axis=1)
test = data[~data.train].drop(['train'], axis=1)

X_train = train[['acousticness',  'danceability', 'duration_ms',
       'energy', 'instrumentalness', 'key', 'liveness',
       'loudness', 'speechiness', 'tempo', 'valence', 
       'artist_popularity', 'followers',]]

X_test = test[['acousticness',  'danceability', 'duration_ms',
       'energy', 'instrumentalness', 'key', 'liveness',
       'loudness', 'speechiness', 'tempo', 'valence', 
       'artist_popularity', 'followers',]]

y_train = train[['popularity','hit',]]
y_test = test[['popularity','hit',]]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.values)
X_test = scaler.transform(X_test.values)

In [13]:
if TASK == 'REG': 
    Y_train = y_train['popularity'].values 
    Y_test = y_test['popularity'].values
    
elif TASK == 'CLAS': 
    Y_train = y_train['hit'].values 
    Y_test = y_test['hit'].values
    
else: 
    Y_train = y_train.values 
    Y_test = y_test.values

# Classification

In [14]:
if TASK == 'CLAS':
    rfc = RandomForestClassifier(n_estimators=100, random_state=0)
    rfc.fit(X_train, Y_train)
    y_pred = rfc.predict(X_test)
    print("RandomForest Classification Report:")
    print(classification_report(Y_test, y_pred))

    svm = SVC(kernel='rbf', random_state=0)
    svm.fit(X_train, Y_train)
    svm_preds = svm.predict(X_test)
    print("SVM Classification Report:")
    print(classification_report(Y_test, svm_preds))

    lr = LogisticRegression(random_state=0)
    lr.fit(X_train, Y_train)
    lr_preds = lr.predict(X_test)
    print("Logistic Regression Classification Report:")
    print(classification_report(Y_test, lr_preds))

# Regression

In [15]:
if TASK == 'REG':
    svm = SVR(kernel='linear')
    svm.fit(X_train, Y_train)
    svm_preds = svm.predict(X_test)
    print("SVM (Linear) Report:")
    print(f"mean_squared_error: {mean_squared_error(Y_test, svm_preds):.2f}")
    print(f"root_mean_squared_error: {mean_squared_error(Y_test, svm_preds) ** 0.5:.2f}")
    print(f"mean_absolute_error: {mean_absolute_error(Y_test, svm_preds):.2f}\n")

    svm = SVR(kernel='rbf')
    svm.fit(X_train, Y_train)
    svm_preds = svm.predict(X_test)
    print("SVM (RBF) Report:")
    print(f"mean_squared_error: {mean_squared_error(Y_test, svm_preds):.2f}")
    print(f"root_mean_squared_error: {mean_squared_error(Y_test, svm_preds) ** 0.5:.2f}")
    print(f"mean_absolute_error: {mean_absolute_error(Y_test, svm_preds):.2f}\n")

    lr = LinearRegression()
    lr.fit(X_train, Y_train)
    lr_preds = lr.predict(X_test)
    print("LR Report:")
    print(f"mean_squared_error: {mean_squared_error(Y_test, lr_preds):.2f}")
    print(f"root_mean_squared_error: {mean_squared_error(Y_test, lr_preds) ** 0.5:.2f}")
    print(f"mean_absolute_error: {mean_absolute_error(Y_test, lr_preds):.2f}")

SVM (Linear) Report:
mean_squared_error: 464.31
root_mean_squared_error: 21.55
mean_absolute_error: 14.71

SVM (RBF) Report:
mean_squared_error: 350.51
root_mean_squared_error: 18.72
mean_absolute_error: 13.18

LR Report:
mean_squared_error: 405.83
root_mean_squared_error: 20.15
mean_absolute_error: 17.64
