In [1]:
# Probably more imports than are really necessary...
import os
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
from tqdm import tqdm
import librosa
import numpy as np
import miditoolkit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, average_precision_score, accuracy_score
import random

## Metrics

In [2]:
def accuracy1(groundtruth, predictions):
    correct = 0
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        if predictions[k] == groundtruth[k]:
            correct += 1
    return correct / len(groundtruth)

In [3]:
def accuracy2(groundtruth, predictions):
    correct = 0
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        if predictions[k] == groundtruth[k]:
            correct += 1
    return correct / len(groundtruth)

In [4]:
TAGS = ['rock', 'oldies', 'jazz', 'pop', 'dance',  'blues',  'punk', 'chill', 'electronic', 'country']

In [5]:
def accuracy3(groundtruth, predictions):
    preds, targets = [], []
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        prediction = [1 if tag in predictions[k] else 0 for tag in TAGS]
        target = [1 if tag in groundtruth[k] else 0 for tag in TAGS]
        preds.append(prediction)
        targets.append(target)
    
    mAP = average_precision_score(targets, preds, average='macro')
    return mAP

## Task 1: Composer classification

In [7]:
dataroot1 = "student_files/task1_composer_classification/"

In [10]:
from scipy.stats import skew, kurtosis, entropy
from statistics import mean, median, stdev
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from lightgbm import LGBMClassifier
from sklearn.ensemble import ExtraTreesClassifier
import json
from music21 import converter, meter, chord, note, key as m21key
from torch.utils.data import TensorDataset
from torch import optim
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier

In [77]:
class model1():
    def __init__(self):
        pass
    
    def _skewness(self, data):
        if len(data) < 3:
            return 0.0
        mean = np.mean(data)
        std = np.std(data)
        if std == 0:
            return 0.0
        return np.mean((data - mean)**3) / (std**3 + 1e-9)

    def _kurtosis(self, data):
        if len(data) < 4:
            return 0.0
        mean = np.mean(data)
        std = np.std(data)
        if std == 0:
            return 0.0
        return np.mean((data - mean)**4) / (std**4 + 1e-9) - 3
    
   
    def features(self, path):
        midi_obj = miditoolkit.midi.parser.MidiFile(dataroot1 + '/' + path)
        notes = []
        for instrument in midi_obj.instruments:
            notes.extend(instrument.notes)
        if not notes:
            return [0.0] * 28  # 根据特征数量调整
        notes.sort(key=lambda x: x.start)
        
        num_notes = len(notes)
        pitches = np.array([n.pitch for n in notes])
        durs = np.array([n.end - n.start for n in notes])
        velos = np.array([n.velocity for n in notes])
        tempos = np.array([tc.tempo for tc in midi_obj.tempo_changes])
        
        # 时间特征
        start = notes[0].start if notes else 0
        end = max(n.end for n in notes) if notes else 0
        total_time = max(end - start, 1e-9)
        note_density = num_notes / total_time

        # 音高特征
        pitch_mean = pitches.mean() if num_notes else 0
        pitch_min = pitches.min() if num_notes else 0
        pitch_max = pitches.max() if num_notes else 0
        pitch_range = pitch_max - pitch_min
        pitch_std = pitches.std(ddof=1) if num_notes > 1 else 0
        pitch_skew = self._skewness(pitches) if num_notes > 2 else 0
        pitch_kurt = self._kurtosis(pitches) if num_notes > 2 else 0
        pitch_q1, pitch_q3 = np.percentile(pitches, [25, 75]) if num_notes else (0, 0)
        pitch_iqr = pitch_q3 - pitch_q1
        
        # 音级熵
        pitch_class = pitches % 12 if num_notes else []
        pc_counts = np.bincount(pitch_class, minlength=12) if num_notes else np.zeros(12)
        pc_probs = pc_counts / pc_counts.sum() if pc_counts.sum() > 0 else np.zeros(12)
        pitch_entropy = entropy(pc_probs)

        # 音高变化
        if num_notes >= 2:
            pitch_diff = np.diff(pitches)
            ups = np.sum(pitch_diff > 0) / num_notes
            downs = np.sum(pitch_diff < 0) / num_notes
            repeats = np.sum(pitch_diff == 0) / num_notes
            avg_interval = np.abs(pitch_diff).mean()
        else:
            ups = downs = repeats = avg_interval = 0.0

        # 持续时间特征
        dur_mean = durs.mean() if num_notes else 0
        dur_std = durs.std(ddof=1) if num_notes > 1 else 0
        rhythm_change = dur_std / dur_mean if dur_mean > 0 else 0
        dur_skew = self._skewness(durs) if num_notes > 2 else 0
        dur_kurt = self._kurtosis(durs) if num_notes > 2 else 0

        # 速度特征
        velo_mean = velos.mean() if num_notes else 0
        velo_std = velos.std(ddof=1) if num_notes > 1 else 0
        velo_skew = self._skewness(velos) if num_notes > 2 else 0
        velo_kurt = self._kurtosis(velos) if num_notes > 2 else 0

        # 节奏特征
        tempo_mean = tempos.mean() if len(tempos) else 0
        tempo_std = tempos.std(ddof=1) if len(tempos) > 1 else 0
        tempo_changes = max(len(tempos) - 1, 0)
        
        high_register_ratio = np.sum(pitches > 72) / num_notes
        dur_med = np.median(durs)
        short_note_ratio = np.sum(durs < dur_med) / num_notes

        features = [
            pitch_mean, pitch_min, pitch_max, pitch_std, pitch_range,
            dur_mean, durs.min() if num_notes else 0, durs.max() if num_notes else 0, dur_std, durs.max() - durs.min(),
            velo_mean, velos.min() if num_notes else 0, velos.max() if num_notes else 0, velo_std, velos.max() - velos.min(),
            ups, downs, repeats, avg_interval, rhythm_change,
            note_density, pitch_q1, pitch_q3, pitch_iqr,
            pitch_skew, pitch_kurt, pitch_entropy,
            dur_skew, dur_kurt, velo_skew, velo_kurt,
            tempo_mean, tempo_std, tempo_changes
            # high_register_ratio, short_note_ratio
        ]
        return features
    
    
    def predict(self, path, outpath=None):
        d = eval(open(path, 'r').read())
        predictions = {}
        for k in d:
            x = self.features(k)
            pred = self.model.predict([x])
            predictions[k] = str(pred[0])
        if outpath:
            with open(outpath, "w") as z:
                z.write(str(predictions) + '\n')
        return predictions

    # Train your model. Note that this function will not be called from the autograder:
    # instead you should upload your saved model using save()
    def train(self, path):
        with open(path, 'r') as f:
            train_json = eval(f.read())
        X_train = [self.features(k) for k in train_json]
        y_train = [train_json[k] for k in train_json]
        # X_train = np.array([self.features(k) for k in train_json])
        # y_train = np.array([train_json[k] for k in train_json])
        # model = LogisticRegression(max_iter=1000)
        # model = ExtraTreesClassifier(
        #     n_estimators=200,
        #     max_depth=None,
        #     max_features='sqrt',
        #     min_samples_leaf=1,
        #     random_state=42,
        #     n_jobs=-1
        # )
        # model = RandomForestClassifier(
        #     n_estimators=500,          # 增加树的数量以提高稳定性:contentReference[oaicite:6]{index=6}
        #     max_depth=10,              # 控制树深，防止过拟合:contentReference[oaicite:7]{index=7}
        #     max_features='sqrt',       # 每次分裂只选 sqrt(n_features) 特征:contentReference[oaicite:8]{index=8}
        #     min_samples_leaf=2,        # 保证叶节点至少 2 个样本，平滑决策边界:contentReference[oaicite:9]{index=9}
        #     oob_score=True,            # 使用袋外样本评估泛化性能:contentReference[oaicite:10]{index=10}
        #     n_jobs=-1,                 # 并行训练，利用所有 CPU 核心
        #     random_state=42            # 固定随机种子，保证可复现
        # )
        # X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
        model = LGBMClassifier(
            n_estimators=2000,       
            max_depth=15,            
            num_leaves=255,                  
            min_child_samples=20,       
            subsample=1.0,             
            colsample_bytree=0.7,      
            learning_rate=0.5,         
            reg_alpha=0.2,           
            reg_lambda=0.2,          
            n_jobs=-1,                
            # random_state=42            
        )
        # model = CatBoostClassifier(
        #     iterations=2000,      # 相当于 n_estimators
        #     depth=15,            # 相当于 max_depth
        #     learning_rate=0.3,
        #     loss_function='MultiClass',  # 根据你的任务选择合适的损失函数
        #     eval_metric='Accuracy',      # 评估指标
        #     random_seed=42,
        #     task_type="CPU",             # 可以根据需要更改为 GPU
        #     verbose=100                  # 控制日志输出频率
        # )
        # model = NeuralNetClassifier()
        # model = LGBMClassifier(random_state=42)
        model.fit(X_train, y_train)
        # model.fit(X_train, y_train, 
        #       eval_set=[(X_val, y_val)])
        self.model = model
        


## Task 2: Sequence prediction

In [6]:
dataroot2 = "student_files/task2_next_sequence_prediction/"

In [7]:
class model2():
    def __init__(self):
        pass

    # def features(self, path):
    #     midi_obj = miditoolkit.midi.parser.MidiFile(dataroot2 + '/' + path)
    #     notes = midi_obj.instruments[0].notes
    #     num_notes = len(notes)
    #     average_pitch = sum([note.pitch for note in notes]) / num_notes
    #     average_duration = sum([note.end - note.start for note in notes]) / num_notes
    #     features = [average_pitch, average_duration]
    #     return features
    
    def _skewness(self, data):
        if len(data) < 3:
            return 0.0
        mean = np.mean(data)
        std = np.std(data)
        if std == 0:
            return 0.0
        return np.mean((data - mean)**3) / (std**3 + 1e-9)

    def _kurtosis(self, data):
        if len(data) < 4:
            return 0.0
        mean = np.mean(data)
        std = np.std(data)
        if std == 0:
            return 0.0
        return np.mean((data - mean)**4) / (std**4 + 1e-9) - 3
    
    def features(self, path):
        midi_obj = miditoolkit.midi.parser.MidiFile(dataroot2 + '/' + path)
        notes = []
        for instrument in midi_obj.instruments:
            notes.extend(instrument.notes)
        if not notes:
            return [0.0] * 28  # 根据特征数量调整
        notes.sort(key=lambda x: x.start)
        
        num_notes = len(notes)
        pitches = np.array([n.pitch for n in notes])
        durs = np.array([n.end - n.start for n in notes])
        velos = np.array([n.velocity for n in notes])
        tempos = np.array([tc.tempo for tc in midi_obj.tempo_changes])
        
        # 时间特征
        start = notes[0].start if notes else 0
        end = max(n.end for n in notes) if notes else 0
        total_time = max(end - start, 1e-9)
        note_density = num_notes / total_time

        # 音高特征
        pitch_mean = pitches.mean() if num_notes else 0
        pitch_min = pitches.min() if num_notes else 0
        pitch_max = pitches.max() if num_notes else 0
        pitch_std = pitches.std(ddof=1) if num_notes > 1 else 0
        pitch_skew = self._skewness(pitches) if num_notes > 2 else 0
        pitch_kurt = self._kurtosis(pitches) if num_notes > 2 else 0
        pitch_q1, pitch_q3 = np.percentile(pitches, [25, 75]) if num_notes else (0, 0)
        pitch_iqr = pitch_q3 - pitch_q1
        
        # 音级熵
        pitch_class = pitches % 12 if num_notes else []
        pc_counts = np.bincount(pitch_class, minlength=12) if num_notes else np.zeros(12)
        pc_probs = pc_counts / pc_counts.sum() if pc_counts.sum() > 0 else np.zeros(12)
        pitch_entropy = entropy(pc_probs)

        # 音高变化
        if num_notes >= 2:
            pitch_diff = np.diff(pitches)
            ups = np.sum(pitch_diff > 0) / num_notes
            downs = np.sum(pitch_diff < 0) / num_notes
            repeats = np.sum(pitch_diff == 0) / num_notes
            avg_interval = np.abs(pitch_diff).mean()
        else:
            ups = downs = repeats = avg_interval = 0.0

        # 持续时间特征
        dur_mean = durs.mean() if num_notes else 0
        dur_std = durs.std(ddof=1) if num_notes > 1 else 0
        rhythm_change = dur_std / dur_mean if dur_mean > 0 else 0
        dur_skew = self._skewness(durs) if num_notes > 2 else 0
        dur_kurt = self._kurtosis(durs) if num_notes > 2 else 0

        # 速度特征
        velo_mean = velos.mean() if num_notes else 0
        velo_std = velos.std(ddof=1) if num_notes > 1 else 0
        velo_skew = self._skewness(velos) if num_notes > 2 else 0
        velo_kurt = self._kurtosis(velos) if num_notes > 2 else 0

        # 节奏特征
        tempo_mean = tempos.mean() if len(tempos) else 0
        tempo_std = tempos.std(ddof=1) if len(tempos) > 1 else 0
        tempo_changes = max(len(tempos) - 1, 0)

        features = [
            pitch_mean, pitch_min, pitch_max, pitch_std, 
            dur_mean, durs.min() if num_notes else 0, durs.max() if num_notes else 0, dur_std,
            velo_mean, velos.min() if num_notes else 0, velos.max() if num_notes else 0, velo_std,
            ups, downs, repeats, avg_interval, rhythm_change,
            note_density, pitch_q1, pitch_q3, pitch_iqr,
            pitch_skew, pitch_kurt, pitch_entropy,
            dur_skew, dur_kurt, velo_skew, velo_kurt,
            tempo_mean, tempo_std, tempo_changes
        ]
        return features
    
    def train(self, path):
        # This baseline doesn't use any model (it just measures feature similarity)
        # You can use this approach but *probably* you'll want to implement a model
        data = eval(open(path, 'r').read())
        X, y = [], []
        
        for (p1, p2), label in data.items():
            x1 = self.features(p1)
            x2 = self.features(p2)
            X.append(x1 + x2)
            y.append(label)
        
        # model = RandomForestClassifier(n_estimators=100, random_state=42)
        model = LGBMClassifier(
            n_estimators=2000,       
            max_depth=12,            
            num_leaves=100,                  
            min_child_samples=5,       
            subsample=1.0,             
            colsample_bytree=0.7,      
            learning_rate=0.3,         
            reg_alpha=0.1,           
            reg_lambda=0.1,          
            n_jobs=-1,                
            random_state=42            
        )
        model.fit(X, y)
        self.model = model

    def predict(self, path, outpath=None):
        d = eval(open(path, 'r').read())
        predictions = {}      
        for k in d:
            path1,path2 = k # Keys are pairs of paths
            x1 = self.features(path1)
            x2 = self.features(path2)
            if self.model is None:
                # Note: hardcoded difference between features
                if abs(x1[0] - x2[0]) < 5:
                    predictions[k] = True
                else:
                    predictions[k] = False
            else:
                X = [x1 + x2]
                pred = self.model.predict(X)[0]
                predictions[k] = bool(pred)
                
        if outpath:
            with open(outpath, "w") as z:
                z.write(str(predictions) + '\n')
        return predictions

## Task 3: Audio classification

In [11]:
# Some constants (you can change any of these if useful)
SAMPLE_RATE = 16000
N_MELS = 64
N_CLASSES = 10
AUDIO_DURATION = 10 # seconds
BATCH_SIZE = 32

In [12]:
dataroot3 = "student_files/task3_audio_classification/"

In [13]:
from torchaudio.transforms import FrequencyMasking, TimeMasking

In [14]:
def extract_waveform(path):
    waveform, sr = librosa.load(dataroot3 + '/' + path, sr=SAMPLE_RATE)
    waveform = np.array([waveform])
    if sr != SAMPLE_RATE:
        resample = torchaudio.transforms.Resample(orig_freq=sr, new_freq=SAMPLE_RATE)
        waveform = resample(waveform)
    # Pad so that everything is the right length
    target_len = SAMPLE_RATE * AUDIO_DURATION
    if waveform.shape[1] < target_len:
        pad_len = target_len - waveform.shape[1]
        waveform = F.pad(waveform, (0, pad_len))
    else:
        waveform = waveform[:, :target_len]
    waveform = torch.FloatTensor(waveform)
    return waveform

In [15]:
def extract_augmented_waveform(path, n_step):
    waveform = extract_waveform(path).squeeze(0).numpy()  # (T,)
    # print("nmsl: ", waveform.shape)
    if n_step != 0:
        waveform = librosa.effects.pitch_shift(waveform, sr=SAMPLE_RATE, n_steps=n_step)
        target_len = SAMPLE_RATE * AUDIO_DURATION
        if len(waveform) < target_len:
            waveform = np.pad(waveform, (0, target_len - len(waveform)), mode='constant')
        else:
            waveform = waveform[:target_len]
    return torch.from_numpy(waveform).unsqueeze(0)

# extract_augmented_waveform("student_files/task3_audio_classification/train/0.wav", 1)

In [16]:
class AudioDataset(Dataset):
    def __init__(self, meta, preload=True, augmented=False):
        self.meta = meta
        self.augmented = augmented

        self.samples = []
        for path in meta:
            if augmented:
                for shift in (-1, 0, +1):
                    self.samples.append((path, shift))
            else:
                self.samples.append((path, 0))

        self.mel = MelSpectrogram(sample_rate=SAMPLE_RATE, n_mels=N_MELS)
        self.db  = AmplitudeToDB()

        self.preload = preload
        if preload:
            self.pathToFeat = {}
            for path, shift in self.samples:
                if shift == 0:
                    wav = extract_waveform(path)
                else:
                    wav = extract_augmented_waveform(path, shift)
                mel_spec = self.db(self.mel(wav)).squeeze(0)
                self.pathToFeat[(path, shift)] = mel_spec

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, shift = self.samples[idx]
        tags = self.meta[path]
        label = torch.tensor([1 if tag in tags else 0 for tag in TAGS],
                             dtype=torch.float32)

        if self.preload:
            mel_spec = self.pathToFeat[(path, shift)]
        else:
            if shift == 0:
                wav = extract_waveform(path)
            else:
                wav = extract_augmented_waveform(path, shift)
            mel_spec = self.db(self.mel(wav)).squeeze(0)

        return mel_spec.unsqueeze(0), label, path

In [17]:
class Loaders():
    def __init__(self, train_path, test_path, split_ratio=0.9, seed = 0):
        torch.manual_seed(seed)
        random.seed(seed)
        
        meta_train = eval(open(train_path, 'r').read())
        l_test = eval(open(test_path, 'r').read())
        meta_test = dict([(x,[]) for x in l_test]) # Need a dictionary for the above class
        
        all_train = AudioDataset(meta_train, augmented=False)
        test_set = AudioDataset(meta_test)
        
        # Split all_train into train + valid
        total_len = len(all_train)
        train_len = int(total_len * split_ratio)
        valid_len = total_len - train_len
        train_set, valid_set = random_split(all_train, [train_len, valid_len])
        
        self.loaderTrain = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
        self.loaderValid = DataLoader(valid_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
        self.loaderTest = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

In [65]:
class CNNClassifier(nn.Module):
    def __init__(self, n_classes=N_CLASSES):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.3)
        self.fc1 = nn.Linear(32 * (N_MELS // 4) * (801 // 4), 256)
        self.fc2 = nn.Linear(256, n_classes)

    def forward(self, x):
        # x = self.pool(F.relu(self.conv1(x)))  # (B, 16, mel/2, time/2)
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  # (B, 32, mel/4, time/4)
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        return torch.sigmoid(self.fc2(x))  # multilabel → sigmoid

class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        # Shortcut connection
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )
        else:
            self.shortcut = nn.Identity()

    def forward(self, x):
        residual = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(residual)
        return self.relu(out)

class OptimizedCNNClassifier(nn.Module):
    def __init__(self, n_classes=N_CLASSES):
        super().__init__()
        self.stem = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True)
        )
        self.body = nn.Sequential(
            ResBlock(16, 32, stride=2),  # 输出尺寸减半
            ResBlock(32, 64, stride=2),  # 输出尺寸再减半
            ResBlock(64, 128, stride=2)  # 输出尺寸再减半
        )
        self.pool = nn.AdaptiveAvgPool2d(1)  # 全局平均池化
        self.fc = nn.Sequential(
            nn.Linear(128, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.4),
            nn.Linear(256, n_classes)
        )

    def forward(self, x):
        x = self.stem(x)
        x = self.body(x)
        x = self.pool(x).flatten(1)
        return torch.sigmoid(self.fc(x))


In [68]:
!nvidia-smi

Sat May 17 23:32:22 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.07                 Driver Version: 566.07         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   50C    P5              7W /   95W |    1834MiB /   8188MiB |      1%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [69]:
class Pipeline():
    def __init__(self, model, learning_rate, seed = 0):
        # These two lines will (mostly) make things deterministic.
        # You're welcome to modify them to try to get a better solution.
        torch.manual_seed(seed)
        random.seed(seed)

        # self.device = torch.device("cpu") # Can change this if you have a GPU, but the autograder will use CPU
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device) #model.cuda() # Also uncomment these lines for GPU
        self.optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        self.criterion = nn.BCELoss()

    def evaluate(self, loader, threshold=0.5, outpath=None):
        self.model.eval()
        preds, targets, paths = [], [], []
        with torch.no_grad():
            for x, y, ps in loader:
                x = x.to(self.device) #x.cuda()
                y = y.to(self.device) #y.cuda()
                outputs = self.model(x)
                preds.append(outputs.cpu())
                targets.append(y.cpu())
                paths += list(ps)
        
        preds = torch.cat(preds)
        targets = torch.cat(targets)
        preds_bin = (preds > threshold).float()
        
        predictions = {}
        for i in range(preds_bin.shape[0]):
            predictions[paths[i]] = [TAGS[j] for j in range(len(preds_bin[i])) if preds_bin[i][j]]
        
        mAP = None
        if outpath: # Save predictions
            with open(outpath, "w") as z:
                z.write(str(predictions) + '\n')
        else: # Only compute accuracy if we're *not* saving predictions, since we can't compute test accuracy
            mAP = average_precision_score(targets, preds, average='macro')
        return predictions, mAP

    def train(self, train_loader, val_loader, num_epochs):
        for epoch in range(num_epochs):
            self.model.train()
            running_loss = 0.0
            for x, y, path in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
                x = x.to(self.device) #x.cuda()
                y = y.to(self.device) #y.cuda()
                self.optimizer.zero_grad()
                outputs = self.model(x)
                loss = self.criterion(outputs, y)
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item()
            val_predictions, mAP = self.evaluate(val_loader)
            print(f"[Epoch {epoch+1}] Loss: {running_loss/len(train_loader):.4f} | Val mAP: {mAP:.4f}")

## Run everything...

In [None]:
def run1():
    model = model1()
    model.train(dataroot1 + "/train.json")
    train_preds = model.predict(dataroot1 + "/train.json")
    test_preds = model.predict(dataroot1 + "/test.json", "predictions1.json")
    
    train_labels = eval(open(dataroot1 + "/train.json").read())
    acc1 = accuracy1(train_labels, train_preds)
    print("Task 1 training accuracy = " + str(acc1))

In [8]:
def run2():
    model = model2()
    model.train(dataroot2 + "/train.json")
    train_preds = model.predict(dataroot2 + "/train.json")
    test_preds = model.predict(dataroot2 + "/test.json", "predictions2.json")
    
    train_labels = eval(open(dataroot2 + "/train.json").read())
    acc2 = accuracy2(train_labels, train_preds)
    print("Task 2 training accuracy = " + str(acc2))

In [70]:
def run3():
    loaders = Loaders(dataroot3 + "/train.json", dataroot3 + "/test.json")
    # model = CNNClassifier()
    model = OptimizedCNNClassifier()
    pipeline = Pipeline(model, 1e-4)
    
    pipeline.train(loaders.loaderTrain, loaders.loaderValid, 5)
    train_preds, train_mAP = pipeline.evaluate(loaders.loaderTrain, 0.5)
    valid_preds, valid_mAP = pipeline.evaluate(loaders.loaderValid, 0.5)
    test_preds, _ = pipeline.evaluate(loaders.loaderTest, 0.5, "predictions3.json")
    
    all_train = eval(open(dataroot3 + "/train.json").read())
    for k in valid_preds:
        # We split our training set into train+valid
        # so need to remove validation instances from the training set for evaluation
        all_train.pop(k)
    acc3 = accuracy3(all_train, train_preds)
    print("Task 3 training mAP = " + str(acc3))

In [None]:
run1()

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001036 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5767
[LightGBM] [Info] Number of data points in the train set: 1210, number of used features: 34
[LightGBM] [Info] Start training from score -2.163902
[LightGBM] [Info] Start training from score -0.903970
[LightGBM] [Info] Start training from score -1.760838
[LightGBM] [Info] Start training from score -3.166550
[LightGBM] [Info] Start training from score -2.344785
[LightGBM] [Info] Start training from score -3.487458
[LightGBM] [Info] Start training from score -2.310884
[LightGBM] [Info] Start training from score -3.206555
Task 1 training accuracy = 1.0


: 

In [11]:
run2()

[LightGBM] [Info] Number of positive: 4779, number of negative: 4779
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12110
[LightGBM] [Info] Number of data points in the train set: 9558, number of used features: 62
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Task 2 training accuracy = 1.0


In [71]:
run3()

Epoch 1: 100%|██████████| 113/113 [01:00<00:00,  1.86it/s]


[Epoch 1] Loss: 0.4295 | Val mAP: 0.2135


Epoch 2: 100%|██████████| 113/113 [00:58<00:00,  1.93it/s]


[Epoch 2] Loss: 0.2786 | Val mAP: 0.2889


Epoch 3: 100%|██████████| 113/113 [00:56<00:00,  1.98it/s]


[Epoch 3] Loss: 0.2528 | Val mAP: 0.3070


Epoch 4: 100%|██████████| 113/113 [00:57<00:00,  1.98it/s]


[Epoch 4] Loss: 0.2392 | Val mAP: 0.3105


Epoch 5: 100%|██████████| 113/113 [00:57<00:00,  1.98it/s]


[Epoch 5] Loss: 0.2313 | Val mAP: 0.3263
Task 3 training mAP = 0.21008228317347827
