In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as data
from sklearn.preprocessing import MinMaxScaler

In [2]:
device = torch.device('cpu') if not torch.cuda.is_available() else torch.device('cuda')
device

device(type='cuda')

## Data

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [91]:
artists = pd.read_csv("/content/drive/MyDrive/Yoga/studia/semestr6/artists.csv")
tracks = pd.read_json("/content/drive/MyDrive/Yoga/studia/semestr6/tracks.json")
users = pd.read_json("/content/drive/MyDrive/Yoga/studia/semestr6/users.json")
sessions = pd.read_json("/content/drive/MyDrive/Yoga/studia/semestr6/sessions.json")

In [92]:
select = sessions['track_id'].value_counts().to_frame()
weights = [select.loc[i].to_numpy()[0] if i in select.index else 0 for i in tracks['id']]
len(weights)

22412

In [93]:
tracks = tracks.sample(n = 250, weights=weights).reset_index(drop=True)

In [102]:
track_ids = tracks.id
VALID_COLUMN_NAMES = ['id', 'duration_ms', 'popularity', 'explicit', 'release_date','danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
tracks = tracks[VALID_COLUMN_NAMES]
rd = tracks.release_date
rd = pd.to_datetime(rd, errors='coerce')
tracks['release_date'] = rd.dt.year.fillna(0).astype(int)

In [95]:
users = users.sample(n = 500).reset_index(drop=True)

In [96]:
users = users.drop(columns=['name', 'city', 'street'])
GENRES = np.unique(np.concatenate(users['favourite_genres'].to_numpy()))

In [97]:
to_drop = sessions['event_type'] == 'skip'
to_drop[len(to_drop)] = False
to_drop = [ (to_drop[i+1] or to_drop[i]) for i in range(len(to_drop) - 1) ]
to_drop = np.array(to_drop)
to_drop += sessions['event_type'] == 'advertisment'
to_drop = ~to_drop
sessions = sessions[to_drop]

In [98]:
sessions.head()

Unnamed: 0,session_id,timestamp,user_id,track_id,event_type
0,124,2021-11-18 10:40:38.000,101,7G67ZJRQT9nn2Fa9vA6B32,play
2,124,2021-11-18 10:41:48.495,101,7G67ZJRQT9nn2Fa9vA6B32,like
4,124,2021-11-18 10:46:00.240,101,6xagjcywpcyNFghafZPQJv,play
8,124,2021-11-18 10:47:54.348,101,70CYAL35X3T73qVStJNpZ2,play
9,124,2021-11-18 10:50:13.137,101,70CYAL35X3T73qVStJNpZ2,like


In [99]:
# users = users[:10]
users.head()


Unnamed: 0,user_id,favourite_genres,premium_user
0,288,"[dance pop, french hip hop, pop rap]",False
1,963,"[post-grunge, pop, uk pop]",True
2,543,"[rock, corrido, adult standards]",False
3,974,"[contemporary country, reggaeton, rock]",False
4,1023,"[hip hop, mexican pop, pop punk]",False


In [100]:
tracks.head()

Unnamed: 0,id,duration_ms,popularity,explicit,release_date,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,6S91KdGvAQdUU8vNpqIte7,258000,61,0,2015,0.379,0.513,1,-7.724,0.0492,0.643,3.2e-05,0.069,0.108,142.067
1,4pxHBdRmwqhiv2B5zy3KG3,267667,58,0,2007,0.623,0.587,0,-7.241,0.0275,0.525,0.000345,0.104,0.538,121.983
2,6i1uWZYWabNHq2wQnoca58,298573,54,0,1989,0.666,0.884,9,-5.243,0.0429,0.506,0.00612,0.0408,0.754,92.005
3,0K8tOSa1LE1Ue784z6qwWZ,375507,52,0,1982,0.591,0.345,5,-14.058,0.0309,0.337,0.0612,0.0862,0.0845,96.474
4,7xP1PzRNG24qpTWdtin1Gb,153373,52,0,2012,0.638,0.648,2,-6.436,0.0351,0.0896,1.6e-05,0.141,0.27,118.908


In [101]:
track_keys = tracks['key']
tracks_scaler = MinMaxScaler()
tracks_scaler.fit(tracks.drop(columns=['id', 'key']))
tracks = tracks_scaler.transform(tracks.drop(columns=['id', 'key']))
tracks = pd.DataFrame(tracks, columns=['duration_ms', 'popularity', 'explicit', 'release_date','danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo'])
tracks['id'] = track_ids
tracks['key'] = track_keys
tracks.head()

Unnamed: 0,duration_ms,popularity,explicit,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,key
0,0.228599,0.217391,0.0,0.894737,0.223841,0.506515,0.751629,0.060516,0.659486,4e-05,0.0469,0.064334,0.49997,6S91KdGvAQdUU8vNpqIte7,1
1,0.238318,0.152174,0.0,0.754386,0.54702,0.582436,0.768024,0.006696,0.53846,0.000427,0.084636,0.531624,0.350478,4pxHBdRmwqhiv2B5zy3KG3,0
2,0.26939,0.065217,0.0,0.438596,0.603974,0.887145,0.835845,0.044891,0.518973,0.007574,0.016496,0.766355,0.127341,6i1uWZYWabNHq2wQnoca58,9
3,0.346737,0.021739,0.0,0.315789,0.504636,0.334154,0.536626,0.015129,0.345639,0.075743,0.065445,0.038796,0.160605,0K8tOSa1LE1Ue784z6qwWZ,5
4,0.12341,0.021739,0.0,0.842105,0.566887,0.645019,0.79535,0.025546,0.091895,2e-05,0.124528,0.240383,0.32759,7xP1PzRNG24qpTWdtin1Gb,2


## Model

Classifier input: <br>
    1. Track Desccription (12 numerical + 2 categorical giving 16 one-hots + 1 binary) <br>
    2. User Description (2 categorical giving 50 one-hots + 1 binary) <br>
    3. Music Fingerprint (5 categorical giving 5x50 one-hots) <br>
Classifier output: <br>
    1. P(User will listen whole track) <br>
    2. P(User will like the track) <br>
    3. P(Track is similar to fingerprint) <br>

In [14]:
# u = users.loc[1]
# t = tracks.loc[640] # 640 play // 8428 like
# select = sessions[u['user_id'] == sessions['user_id']]
# select = select[t['id'] == select['track_id']]
# play = bool(sum(select['event_type'] == 'play'))
# like = bool(sum(select['event_type'] == 'like'))
# ug = u['favourite_genres']
# ug = torch.Tensor(np.array([i in ug for i in GENRES]) * 1)
# ur = torch.Tensor([u['premium_user'] * 1])
# tk = torch.Tensor([int(i == t['key']) for i in range(16)])
# tr = torch.Tensor(t.drop(labels=['id', 'key']).to_numpy().astype(np.float64))
# [[ug, ur], [tk, tr]], [play, like]

KeyError: ignored

In [15]:
tracks[tracks['id'] == '0Mn3amMRMoabaoTf1Publ4']

Unnamed: 0,duration_ms,popularity,explicit,release_date,danceability,...,liveness,valence,tempo,id,key


In [31]:
class MusicDataset(data.Dataset):
    def __init__(self, users, tracks, sessions):
        self.users = users
        self.lusers = len(users)
        self.tracks = tracks
        self.ltracks = len(tracks)
        self.sessions = sessions
        self.z = min(self.lusers, self.ltracks)

    def __len__(self):
        return self.ltracks * self.lusers

    def __getitem__(self, idx):
        u = users.loc[idx // self.z]
        t = tracks.loc[idx % self.z]
        select = sessions[u['user_id'] == sessions['user_id']]
        select = select[t['id'] == select['track_id']]
        play = bool(sum(select['event_type'] == 'play'))
        like = bool(sum(select['event_type'] == 'like'))
        ug = u['favourite_genres']
        ug = torch.Tensor(np.array([i in ug for i in GENRES]) * 1)
        ur = torch.Tensor([u['premium_user'] * 1])
        tk = torch.Tensor([int(i == t['key']) for i in range(16)])
        tr = torch.Tensor(t.drop(labels=['id', 'key']).to_numpy().astype(np.float64))

        return [[ug, ur], [tk, tr]], [play, like]

In [17]:
class Music_classifier(nn.Module):
    def __init__(self, genres):
        self.genres = genres
        #    |
        # 50 | Genres | 10    }
        #    |                } USER [11]
        # czy_premium | 1     }
    
        #    |
        # 16 | key    | 3     }
        #    |                } TRACK [16]
        # numeric     | 12    }
        # binary      | 1     }
    
        # TRACK [16] |        }
        # TRACK [16] |   | 10 } FINGERPRINT [10]
        # TRACK [16] |        }
        track_key_code = 3
        user_genre_code = 10
        fingerprint_params = 15
        super(Music_classifier, self).__init__()
        ##### USER PREP ######
        self.emb_user_genre = nn.Linear(len(genres), user_genre_code)
        self.emb_user_genre_act = nn.LeakyReLU()

        ##### TRACK PREP #####
        self.emb_track_key = nn.Linear(16, track_key_code)
        self.emb_track_key_act = nn.LeakyReLU()

        ##### FINGERPRINT PREP #####
        # self.emb_fingerprint = nn.Linear(3 * (track_key_code + 12 + 1), fingerprint_params)
        # self.emb_fingerprint_act = nn.LeakyReLU()
        # self.emb_fingerprint_track1 = nn.Linear(16, track_key_code)
        # self.emb_fingerprint_track1_act = nn.LeakyReLU()
        # self.emb_fingerprint_track2 = nn.Linear(16, track_key_code)
        # self.emb_fingerprint_track2_act = nn.LeakyReLU()
        # self.emb_fingerprint_track3 = nn.Linear(16, track_key_code)
        # self.emb_fingerprint_track3_act = nn.LeakyReLU()

        #### MAIN CLASSIFIER ####
        user_params = user_genre_code + 1
        track_params = track_key_code + 12 + 1

        self.layers = nn.Sequential(
            # nn.Linear(user_params + track_params + fingerprint_params, 256),
            nn.Linear(user_params + track_params, 128),
            nn.LeakyReLU(),

            nn.BatchNorm1d(128),
            nn.Linear(128, 64),
            nn.Dropout(0.1),
            nn.LeakyReLU(),

            nn.Linear(64, 32),
            nn.ReLU(),
            #nn.Linear(32, 3),
            nn.Linear(32, 2),

            nn.Sigmoid()
        )
    #def forward(self, user, track, finger):
    def forward(self, x):
        # user -> [Tensor(50, bs), Tensor(1, bs)]
        # track -> [Tensor(16, bs), Tensor(13, bs)]
        # finger -> [ [Tensor(16, bs), Tensor(13, bs)], [Tensor(16, bs), Tensor(13, bs)], [Tensor(16, bs), Tensor(13, bs)] ]
        user, track = x

        comp_emb_user_genre = self.emb_user_genre(user[0])
        comp_emb_user_genre = self.emb_user_genre_act(comp_emb_user_genre)

        comp_emb_track_key = self.emb_track_key(track[0])
        comp_emb_track_key = self.emb_track_key_act(comp_emb_track_key)

        # comp_emb_fingerprint_track1 = self.emb_fingerprint_track1(finger[0][0])
        # comp_emb_fingerprint_track1 = self.emb_fingerprint_track1_act(comp_emb_fingerprint_track1)
        # comp_emb_fingerprint_track2 = self.emb_fingerprint_track2(finger[1][0])
        # comp_emb_fingerprint_track2 = self.emb_fingerprint_track2_act(comp_emb_fingerprint_track2)
        # comp_emb_fingerprint_track3 = self.emb_fingerprint_track3(finger[2][0])
        # comp_emb_fingerprint_track3 = self.emb_fingerprint_track3_act(comp_emb_fingerprint_track3)
        # emb_finger_x = torch.cat([comp_emb_fingerprint_track1, finger[0][1], 
        #                           comp_emb_fingerprint_track2, finger[1][1], 
        #                           comp_emb_fingerprint_track3, finger[2][1]], dim=1)
        # comp_emb_fingerprint = self.emb_fingerprint(emb_finger_x)
        # comp_emb_fingerprint = self.emb_fingerprint_act(comp_emb_fingerprint)

        # x = torch.cat([comp_emb_user_genre, user[1], comp_emb_track_key, track[1], comp_emb_fingerprint])
        # print(comp_emb_user_genre.shape, user[1].shape, comp_emb_track_key.shape, track[1].shape)
        c = torch.cat([comp_emb_user_genre, user[1], comp_emb_track_key, track[1]], 1)
        
        return self.layers(c)


## Training

In [51]:
def accuracy(loader, model, device):
    with torch.no_grad():
        good_like = 0
        good_play = 0
        all = 0
        for x, y in loader:
            x = [ [ x[0][0].to(device), x[0][1].to(device) ], [ x[1][0].to(device), x[1][1].to(device) ] ]
            y = [ y[0].to(device), y[1].to(device) ]  
            preds = model(x)
            preds = preds.to(device)
            good_play = sum([a == b for a, b in zip(np.round(preds.cpu().detach().numpy(), 0).T[0], y[0])])
            good_like = sum([a == b for a, b in zip(np.round(preds.cpu().detach().numpy(), 0).T[1], y[1])])
            all = len(preds)
        print(f"Accuracy of playing: {good_play * 100/all:.3}%\nAccuracy of liking: {good_like * 100/all:.3}%\n")
           

In [103]:
dataset = MusicDataset(users, tracks, sessions)
train, test = data.random_split(dataset, [round(len(dataset) * 0.7), round(len(dataset) * 0.3)])

In [104]:
train_dataloader = data.DataLoader(train, batch_size=64, shuffle=True, drop_last=True)
test_dataloader = data.DataLoader(test, batch_size=64, shuffle=False, drop_last=False)

In [44]:
model = Music_classifier(GENRES)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_module = nn.CrossEntropyLoss()

In [45]:
model.to(device)

Music_classifier(
  (emb_user_genre): Linear(in_features=50, out_features=10, bias=True)
  (emb_user_genre_act): LeakyReLU(negative_slope=0.01)
  (emb_track_key): Linear(in_features=16, out_features=3, bias=True)
  (emb_track_key_act): LeakyReLU(negative_slope=0.01)
  (layers): Sequential(
    (0): Linear(in_features=27, out_features=128, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): Dropout(p=0.1, inplace=False)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=64, out_features=32, bias=True)
    (7): ReLU()
    (8): Linear(in_features=32, out_features=2, bias=True)
    (9): Sigmoid()
  )
)

In [None]:
EPOCHS = 10
model.train()
for epoch in range(EPOCHS):
    for x, y in train_dataloader:
        x = [ [ x[0][0].to(device), x[0][1].to(device) ], [ x[1][0].to(device), x[1][1].to(device) ] ]
        y = [ y[0].to(device), y[1].to(device) ]  
        preds = model(x)
        loss = loss_module(preds, torch.cat([y[0].unsqueeze(1), y[1].unsqueeze(1)], 1).float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch: {epoch}, loss: {loss.sum().item():.3}')
    if(epoch % 5):
        accuracy(train_dataloader, model, device)


Epoch: 0, loss: 0.0418
Epoch: 1, loss: 0.027
Accuracy of playing: 12.5%
Accuracy of liking: 92.2%



In [52]:
accuracy(train_dataloader, model, device)

Accuracy of playing: 1.56%
Accuracy of liking: 54.7%



In [58]:
accuracy(train_dataloader, lambda x: torch.zeros(64, 2), device)

Accuracy of playing: 1e+02%
Accuracy of liking: 1e+02%



## Saving

In [106]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Yoga/studia/semestr6/classifier.model')

In [None]:
import joblib
joblib.dump(tracks_scaler, "classifier_track.scaler")