In [270]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

In [271]:
class MusicDataset(Dataset):
    
    def __init__(self):
        df_train = pd.read_csv('datasets/music_train.csv').drop(columns=['id']).iloc[:13700,:]
        columnNames = ["genre","release_date", "dating","violence","world/life","night/time","shake the audience","family/gospel","romantic","communication","obscene","music","movement/places","light/visual perceptions","family/spiritual","like/girls","sadness","feelings","danceability","loudness","acousticness","instrumentalness","valence","energy","age","topic"]
        df_train = df_train.reindex(columns = columnNames)
        df_train.dropna(axis=0, how='all', subset=['topic'], inplace=True)
        i=0
        for (col, colValue) in df_train.iteritems():
            mean_val = df_train[col].mean()
            df_train[col].fillna(value=mean_val, inplace=True)
        
    
        tmp1 = df_train.values
        self.x_data = torch.from_numpy(tmp1[:, 1:])
        self.y_data = torch.from_numpy(tmp1[:,0])
        self.n_samples = tmp1.shape[0]
        
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    
    def __len__(self):
        return self.n_samples
        
        

        
        
        

In [272]:
class MusicValidationDataset(Dataset):
    
    def __init__(self):
        df_test = pd.read_csv('datasets/music_train.csv').drop(columns=['id']).iloc[13700:,:]
        columnNames = ["genre","release_date", "dating","violence","world/life","night/time","shake the audience","family/gospel","romantic","communication","obscene","music","movement/places","light/visual perceptions","family/spiritual","like/girls","sadness","feelings","danceability","loudness","acousticness","instrumentalness","valence","energy","age","topic"]
        df_test = df_test.reindex(columns = columnNames)
        df_test.dropna(axis=0, how='all', subset=['topic'], inplace=True)
        i=0
        for (col, colValue) in df_test.iteritems():
            mean_val = df_test[col].mean()
            df_test[col].fillna(value=mean_val, inplace=True)
        
    
        tmp2 = df_test.values
        self.x_test_data = torch.from_numpy(tmp2[:, 1:])
        self.y_test_data = torch.from_numpy(tmp2[:, 0])
        self.n_test_samples = tmp2.shape[0]
        
        
    def __getitem__(self, index):
        return self.x_test_data[index], self.y_test_data[index]
    
    
    def __len__(self):
        return self.n_test_samples
        
        

        
        
   

In [273]:
dataset = MusicDataset()

In [274]:
train_loader = DataLoader(dataset=dataset, batch_size=200, shuffle=True)


In [275]:
validation_dataset = MusicValidationDataset()
validation_loader = DataLoader(dataset=validation_dataset, batch_size=200, shuffle=False)

In [276]:
input_size = 25
learning_rate=0.01
hidden_size1=50
hidden_size2 = 100
num_classes =7
num_epochs = 2

In [277]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size1,hidden_size2, num_classes):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size1)
        self.relu = nn.ReLU()
#         self.l2 = nn.Linear(hidden_size, num_classes)
        self.l2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu = nn.ReLU()
        self.l3 = nn.Linear(hidden_size2, num_classes)
        
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        
        return out

    
model = NeuralNet(input_size, hidden_size1,hidden_size2 ,num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [278]:
# training the model
n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i, (inputs, targets) in enumerate(train_loader):

        outputs = model.forward(inputs.float())
        targets= targets.long()
                
        loss = criterion(outputs, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        

        
        if (i) % 10 == 0:
             print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')



Epoch [1/2], Step [1/66], Loss: 47.3143
Epoch [1/2], Step [11/66], Loss: 35.5753
Epoch [1/2], Step [21/66], Loss: 3.9869
Epoch [1/2], Step [31/66], Loss: 1.9199
Epoch [1/2], Step [41/66], Loss: 1.9086
Epoch [1/2], Step [51/66], Loss: 1.8947
Epoch [1/2], Step [61/66], Loss: 1.8471
Epoch [2/2], Step [1/66], Loss: 1.8963
Epoch [2/2], Step [11/66], Loss: 1.8554
Epoch [2/2], Step [21/66], Loss: 1.8890
Epoch [2/2], Step [31/66], Loss: 1.8917
Epoch [2/2], Step [41/66], Loss: 1.8229
Epoch [2/2], Step [51/66], Loss: 1.8857
Epoch [2/2], Step [61/66], Loss: 1.8585


In [279]:
class MusicTestDataset(Dataset):
    
    def __init__(self):
        df_test = pd.read_csv('datasets/music_test.csv').drop(columns=['id'])
        columnNames = ["release_date", "dating","violence","world/life","night/time","shake the audience","family/gospel","romantic","communication","obscene","music","movement/places","light/visual perceptions","family/spiritual","like/girls","sadness","feelings","danceability","loudness","acousticness","instrumentalness","valence","energy","age","topic"]
        df_test = df_test.reindex(columns = columnNames)
        i=0
        for (col, colValue) in df_test.iteritems():
            mean_val = df_test[col].mean()
            df_test[col].fillna(value=mean_val, inplace=True)
        
    
        tmp2 = df_test.values
        self.x_test_data = torch.from_numpy(tmp2[:, :])
        self.n_test_samples = tmp2.shape[0]
        
        
    def __getitem__(self, index):
        return self.x_test_data[index] 
    
    
    def __len__(self):
        return self.n_test_samples
        
        

        
        
   

In [280]:
test_data = MusicTestDataset()
test_loader = DataLoader(dataset=test_data, batch_size=200,shuffle=False)

In [281]:
with torch.no_grad():
    n_correct_test = 0
    n_samples_test = 0
    i=0
    predictions = np.empty((0,))
    for inputs_test in test_loader:
        outputs_test = model(inputs_test.float())
        _,predicted_test = torch.max(outputs_test.data, 1)
        predictions = np.append(predictions,predicted_test.numpy())



In [282]:
predictions_column = pd.Series(predictions, name="genre")


music_test_dataset_id = pd.read_csv('datasets/music_test.csv')
id_column = pd.Series(music_test_dataset_id.id, name="id")

submission_df = pd.merge(id_column , predictions_column, right_index=True, left_index=True)
submission_df.to_csv('submission_df_music.csv', index=False)