In [1]:
import numpy as np
import skfuzzy as fuzz
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [2]:
#Acesso ao dataset
music = pd.read_csv('dataset.csv', index_col=0) #chama o ficheiro e remove coluna desnecessária (index_col)

#Garantir que o acesso foi bem sucedido (faz print das primeiras 5 linhas do dataset)
print(music.head())
music.shape

                 track_id                 artists  \
0  5SuOikwiRyPMVoIQDJUgSV             Gen Hoshino   
1  4qPNDBW1i3p13qLCt0Ki3A            Ben Woodward   
2  1iJBSr7s7jYXzM8EGcbK5b  Ingrid Michaelson;ZAYN   
3  6lfxq3CG4xtTiEg7opyCyx            Kina Grannis   
4  5vjLSffimiIP26QG5WcN2K        Chord Overstreet   

                                          album_name  \
0                                             Comedy   
1                                   Ghost (Acoustic)   
2                                     To Begin Again   
3  Crazy Rich Asians (Original Motion Picture Sou...   
4                                            Hold On   

                   track_name  popularity  duration_ms  explicit  \
0                      Comedy          73       230666     False   
1            Ghost - Acoustic          55       149610     False   
2              To Begin Again          57       210826     False   
3  Can't Help Falling In Love          71       201933     False   
4   

(114000, 20)

In [3]:
##limpeza do dataset
# Contar quantas linhas duplicadas existem (mesmo nome + mesmo artista)
duplicadas_antes = music.duplicated(subset=["track_name", "artists"]).sum()
print(f"Antes da limpeza: {duplicadas_antes} músicas duplicadas")

# Remover duplicados (mesmo nome e mesmo artista)
music = music.drop_duplicates(subset=["track_name", "artists"], keep="first")

print(music.shape)

#remover tracks que não são musica (podcasts, audio books)
music = music[music['speechiness'] <= 0.66]

print(f'Depois de filtrar para incluir apenas as músicas: {music.shape}')


#valores em falta por coluna
print(music.isnull().sum()) 
music = music.dropna()  #por serem poucos


Antes da limpeza: 32656 músicas duplicadas
(81344, 20)
Depois de filtrar para incluir apenas as músicas: (80483, 20)
track_id            0
artists             1
album_name          1
track_name          1
popularity          0
duration_ms         0
explicit            0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
time_signature      0
track_genre         0
dtype: int64


In [4]:
#defenir as features
f0 = music['danceability'].values      #[0,1]
f1 = music['energy'].values            #[0,1]
f2 = music['key'].values               #[-1,11]
f3 = music['loudness'].values          #dB
f4 = music['mode'].values              #binary
f5 = music['acousticness'].values      #[0,1]
f6 = music['instrumentalness'].values  #[0,1]
f7 = music['valence'].values           #valence
f8 = music['tempo'].values             #bpm
f9 = music['time_signature'].values    #[0,5]


y = music['track_genre'].values       #classes

'---------------------------------------'
# matriz features
#track genre não incluida, só mais á frente
X = music[['danceability', 'energy', 'key', 'loudness', 'mode', 'acousticness', 'instrumentalness', 'valence', 'tempo', 'time_signature']].values    

#x = np.column_stack((f0, f1, f2, f3, f4, f5, f6, f7, f8, f9))
#x = np.c_[f0, f1, f2, f3, f4, f5, f6, f7, f8, f9]

'-------------------------------------melhor metodo?'

print(y)
print(f'matriz x \n {X}')

music.shape


['acoustic' 'acoustic' 'acoustic' ... 'world-music' 'world-music'
 'world-music']
matriz x 
 [[6.76000e-01 4.61000e-01 1.00000e+00 ... 7.15000e-01 8.79170e+01
  4.00000e+00]
 [4.20000e-01 1.66000e-01 1.00000e+00 ... 2.67000e-01 7.74890e+01
  4.00000e+00]
 [4.38000e-01 3.59000e-01 0.00000e+00 ... 1.20000e-01 7.63320e+01
  4.00000e+00]
 ...
 [6.29000e-01 3.29000e-01 0.00000e+00 ... 7.43000e-01 1.32378e+02
  4.00000e+00]
 [5.87000e-01 5.06000e-01 7.00000e+00 ... 4.13000e-01 1.35960e+02
  4.00000e+00]
 [5.26000e-01 4.87000e-01 1.00000e+00 ... 7.08000e-01 7.91980e+01
  4.00000e+00]]


(80482, 20)

In [5]:
#verificaçaõ da limpeza
duplicadas_depois = music.duplicated(subset=["track_name", "artists"]).sum()
print(f"Depois da limpeza: {duplicadas_depois} músicas duplicadas")


Depois da limpeza: 0 músicas duplicadas


In [6]:
#train, validation e test split
test_size = 0.3
val_size = 0.5   #será metade dos dados de teste
#Seprarar o conjunto de treino (70%) e o temporário (30%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size = test_size, random_state = 42)
#Separar o conjunto temporário em teste (15%) e validação (15%)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = val_size, random_state = 42)

'test será o conjunto nunca visto pelo algoritmo até ao fim'

'test será o conjunto nunca visto pelo algoritmo até ao fim'

In [7]:
#Normalizar varivaeis Key, loudness, tempo, time_signature
scaler = MinMaxScaler()
X_train[ :, [ 2, 3, 6, 7]] = scaler.fit_transform(X_train[ :, [ 2, 3, 6, 7]])
X_val[ :, [ 2, 3, 6, 7]] = scaler.transform(X_val[ :, [2, 3, 6, 7]])

print(X_train)
print(f' matriz x-val: \n {X_val}')

[[4.97000000e-01 2.94000000e-01 4.54545455e-01 ... 3.09547739e-01
  1.73993000e+02 4.00000000e+00]
 [5.86000000e-01 6.95000000e-01 9.09090909e-02 ... 9.07537688e-01
  1.57920000e+02 4.00000000e+00]
 [6.67000000e-01 5.03000000e-01 0.00000000e+00 ... 5.86934673e-01
  1.44015000e+02 4.00000000e+00]
 ...
 [7.88000000e-01 7.45000000e-01 1.81818182e-01 ... 2.00000000e-01
  1.22001000e+02 4.00000000e+00]
 [5.64000000e-01 7.31000000e-01 8.18181818e-01 ... 2.58291457e-01
  9.50360000e+01 4.00000000e+00]
 [7.71000000e-01 7.39000000e-01 8.18181818e-01 ... 6.90452261e-01
  1.30024000e+02 4.00000000e+00]]
 matriz x-val: 
 [[8.86000000e-01 6.26000000e-01 6.36363636e-01 ... 9.48743719e-01
  1.24980000e+02 4.00000000e+00]
 [4.81000000e-01 9.12000000e-01 1.00000000e+00 ... 6.93467337e-01
  7.80060000e+01 4.00000000e+00]
 [5.54000000e-01 8.27000000e-01 1.00000000e+00 ... 1.57788945e-01
  1.50104000e+02 4.00000000e+00]
 ...
 [8.38000000e-01 8.03000000e-01 5.45454545e-01 ... 5.79899497e-01
  1.26026000e+0

In [8]:
#Modelo 1: Encoder do género (y -> z)
class GenreEncoder(nn.Module):
    def __init__(self, num_genres, embedding_dim):
        super(GenreEncoder, self).__init__()
        self.genre_embedding = nn.Embedding(num_genres, embedding_dim)

    def forward(self, genre_idx):
        # genre_idx: [batch_size, 1] ou [batch_size]
        z = self.embedding(genre_idx).squeeze(1)
        return z  # z ∈ R^(batch_size × embedding_dim)

#Modelo 2: Encoder das features (x -> ẑ)
class FeatureEncoder(nn.Module):
    def __init__(self, input_dim, embedding_dim, dropout_prob = 0.3):
        super(FeatureEncoder, self).__init__()
        self.input = nn.Linear (input_dim, 64)
        self.hidden1 = nn.Linear (64, 64)
        self.hidden2 = nn.Linear (64, 64)
        self.out = nn.Linear (64, embedding_dim)
        self.dropout = nn.Droout (p = dropout_prob)


    def forward(self, x):
        x = F.relu (self.input(x))
        x = self.dropout(x)

        x = F.relu (self.hidden1(x))
        x = self.dropout(x)

        x = F.relu (self.hidden2(x))
        x = self.dropout(x)

        z_hat = self.out(x)
        return z_hat  # ẑ ∈ R^(batch_size × embedding_dim)



In [9]:
#setup de parâmetros
embedding_dim = 30
num_genres = 114
num_epochs = 200
learning_rate = 0.001
dropout = 0.2     #nunca acima de 0.5
batch_size = 64


In [10]:
#conversão dos dados para tensores pythorch
X_train = torch.tensor (X_train, dtype = torch.float32)
y_train = torch.tensor (y_train, dtype = torch.float32)
X_val = torch.tensor (X_val, dtype = torch.float32)
y_val = torch.tensor (y_val, dtype = torch.float32)

#unir tensores X_train e y_train num dataset
train_dataset = TensorDataset (X_train, y_train)

# Create DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

In [None]:
#Model
genre_embedding = GenreEncoder(num_genres, embedding_dim)
model = FeatureEncoder(input_dim = X_train.shape[1], embedding_dim = embedding_dim, dropout_prob = dropout)
#Loss
criterion = nn.CosineSimilarity(dim=1)
#Otimizer
optimizer = optim.Adam( list(model.parameters()) + list(genre_embedding.parameters()), lr = learning_rate)


SyntaxError: positional argument follows keyword argument (3417583946.py, line 3)

In [None]:
#Training Loop
for epoch in range(num_epochs):
    #ativa modo de treino nos dois modelos
    model.train()
    genre_embedding.train()

    total_loss = 0.0

    for x_feat, y_genre in train_dataloader:
        optimizer.zero_grad()
        # z do género
        z = genre_embedding ( y_genre )       # [batch, embedding_dim]
        # ẑ das features
        z_hat = model ( x_feat )  # [batch, embedding_dim]

        # Loss = 1 - cos_sim
        cos_sim = criterion( z_hat, z)
        loss = 1 - cos_sim.mean()

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {avg_loss:.4f}")

