In [2]:
import os 
import sys
import numpy as np
import pandas as pd 
import pickle
from tqdm import tqdm


In [3]:
import torch
import torch.nn as nn
import torch.functional as F
from torch.utils.data import DataLoader
from sklearn.metrics import *
from sklearn.neural_network import MLPClassifier

  from .autonotebook import tqdm as notebook_tqdm


In [46]:
audio_data_path = 'Passt/'
video_data_path = '../11775-hw2-handout/data/cnn2d_1d/'

In [47]:
train_df = pd.read_csv('data/labels/train_val.csv')
val_df = pd.read_csv('data/labels/val.csv')
test_df = pd.read_csv('data/labels/test_for_students.csv')

In [48]:
audio_file_list = os.listdir(audio_data_path)
video_file_list = os.listdir(video_data_path)

In [49]:
config = {'batch_size': 64,
          'epochs': 10,
          'learning_rate': 0.001,
        }

In [50]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cuda


In [51]:
def data_loading(label_df):
    feat_all = []
    label_all = []
    
    for ix, row in tqdm(label_df.iloc[:].iterrows()):
        Id = label_df.iloc[ix].Id
        category = train_df.iloc[ix].Category
        if Id+'.csv' in audio_file_list:
            audio_feat = pd.read_csv(os.path.join(audio_data_path, Id+'.csv')).values
            audio_feat = audio_feat.reshape(-1, 1)
        else:
            print(Id)
            continue
        if Id+'.pkl' in video_file_list:
            with open(os.path.join(video_data_path, Id+'.pkl'), 'rb') as f:
                video_feat = pickle.load(f)
                video_feat = video_feat[1].numpy().reshape(-1, 1)
        else:
            print(Id)
            continue
        

        feature = np.concatenate([audio_feat, video_feat]).squeeze()
        feat_all.append(feature)
        label_all.append(category)

    feat_all = torch.FloatTensor(np.stack(feat_all))
    label_all = torch.FloatTensor(np.stack(label_all))

    length = len(feat_all)
    
    return feat_all, label_all

In [52]:
train_x, train_y = data_loading(train_df.iloc[:])
val_x, val_y = data_loading(val_df.iloc[:])
test_x, test_y = data_loading(test_df.iloc[:])

1146it [00:07, 160.73it/s]

LTQ5ODI3NjU5MTQ3OTQ4NTAwOQ==


3159it [00:19, 160.90it/s]

NTkxNzA4MjE4OTM1ODg4NTYxOA==


7062it [00:44, 159.38it/s]

LTgxOTM5Mzg2MTMwNzM4NjQzNzg=


7500it [00:46, 159.90it/s]
310it [00:01, 278.11it/s]

LTQ5ODI3NjU5MTQ3OTQ4NTAwOQ==


1760it [00:06, 273.92it/s]
749it [00:04, 153.87it/s]


In [53]:
clf = MLPClassifier(hidden_layer_sizes=(1024), activation="relu",solver="adam",alpha=1e-3, verbose=True, validation_fraction=0.2)
# clf.fit(X_train, y_train)
clf.fit(train_x, train_y)

Iteration 1, loss = 8.61950404
Iteration 2, loss = 0.62966126
Iteration 3, loss = 0.31781233
Iteration 4, loss = 0.21771714
Iteration 5, loss = 0.16455354
Iteration 6, loss = 0.13246934
Iteration 7, loss = 0.11031081
Iteration 8, loss = 0.08786745
Iteration 9, loss = 0.07938841
Iteration 10, loss = 0.07484779
Iteration 11, loss = 0.05839098
Iteration 12, loss = 0.05760411
Iteration 13, loss = 0.04779342
Iteration 14, loss = 0.04260763
Iteration 15, loss = 0.03978156
Iteration 16, loss = 0.03617854
Iteration 17, loss = 0.03231652
Iteration 18, loss = 0.03446668
Iteration 19, loss = 0.03061536
Iteration 20, loss = 0.02400317
Iteration 21, loss = 0.01942564
Iteration 22, loss = 0.01966005
Iteration 23, loss = 0.01748253
Iteration 24, loss = 0.01772135
Iteration 25, loss = 0.01771043
Iteration 26, loss = 0.01453295
Iteration 27, loss = 0.01432472
Iteration 28, loss = 0.01221833
Iteration 29, loss = 0.01160442
Iteration 30, loss = 0.01140017
Iteration 31, loss = 0.01156949
Iteration 32, los

In [55]:
pred_classes = clf.predict(val_x)
accuracy_score(val_y, torch.Tensor(pred_classes))

0.06765207504263786

In [57]:
pred_classes = clf.predict(test_x)
# accuracy_score(val_y, pred_classes)

In [58]:
pred_classes

array([ 4., 12., 14., 12.,  6.,  1.,  8.,  3.,  3.,  4.,  1.,  4.,  8.,
        4.,  9., 12.,  5.,  2.,  2.,  6.,  8.,  5., 12.,  1.,  3.,  0.,
        0., 13., 13.,  1.,  1., 11.,  3.,  0.,  3.,  0.,  6.,  2.,  9.,
       14., 13., 14.,  4.,  9., 14., 14.,  5., 14., 10., 11., 10.,  2.,
        5., 12., 10.,  4.,  7., 12., 14., 10.,  0.,  7., 10.,  2.,  2.,
        7., 14.,  9.,  3., 10., 11., 14.,  1., 10., 14., 13., 13.,  7.,
        4., 11., 13.,  5.,  3.,  0.,  3.,  6.,  4.,  7., 11.,  9.,  8.,
        4., 11.,  0.,  8.,  9.,  3., 12.,  4.,  7.,  7.,  4.,  1., 10.,
        3., 13.,  1.,  4.,  8.,  1.,  3.,  1., 10.,  9.,  0.,  3.,  9.,
        2.,  6.,  2., 11., 11.,  9.,  7.,  5.,  2.,  8.,  3.,  7.,  1.,
       10.,  4.,  5.,  0.,  0.,  2.,  6.,  9.,  0.,  1.,  1.,  4.,  6.,
       10.,  5.,  8.,  9., 11.,  3.,  6.,  1.,  0., 12., 11.,  8., 12.,
        8., 13.,  6.,  8., 14.,  9., 11.,  3.,  4.,  1.,  7., 12., 10.,
        3.,  3., 12., 12.,  6.,  5.,  3.,  0.,  4., 10., 11.,  7

In [59]:
with open('output/early_sklearn_passt.csv', "w") as f:
    f.writelines("Id,Category\n")
    for i, pred_class in enumerate(pred_classes):
      f.writelines("%s,%d\n" % (test_df.Id[i], pred_class))

In [18]:
# !kaggle competitions submit -c 11775-fall2022-hw3 -f output/sklearn_1.csv -m "Message"

/bin/bash: kaggle: command not found
