# HW1: Frame-Level Speech Recognition

In this homework, you will be working with MFCC data consisting of 27 features at each time step/frame. Your model should be able to recognize the phoneme occured in that frame.

# Libraries

In [1]:
# !pip install torchsummaryX wandb --quiet

In [2]:
import torch
import numpy as np
from torchsummaryX import summary
import sklearn
import gc
import zipfile
import pandas as pd
from tqdm.auto import tqdm
import os
import datetime
import wandb
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

  from .autonotebook import tqdm as notebook_tqdm


Device:  cuda


In [3]:
### If you are using colab, you can import google drive to save model checkpoints in a folder
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
### PHONEME LIST
PHONEMES = [
            '[SIL]',   'AA',    'AE',    'AH',    'AO',    'AW',    'AY',
            'B',     'CH',    'D',     'DH',    'EH',    'ER',    'EY',
            'F',     'G',     'HH',    'IH',    'IY',    'JH',    'K',
            'L',     'M',     'N',     'NG',    'OW',    'OY',    'P',
            'R',     'S',     'SH',    'T',     'TH',    'UH',    'UW',
            'V',     'W',     'Y',     'Z',     'ZH',    '[SOS]', '[EOS]']

In [5]:
# Configs:
DATA_ROOT = "/mnt/e/Workspace/IDL/Data/hw1/11-785-s24-hw1p2/"
MODEL_ROOT = "/mnt/e/Workspace/IDL/Models/hw1/11-785-s24-hw1p2/"

# Kaggle

This section contains code that helps you install kaggle's API, creating kaggle.json with you username and API key details. Make sure to input those in the given code to ensure you can download data from the competition successfully.

In [6]:
# !pip install --upgrade --force-reinstall --no-deps kaggle==1.5.8
# !mkdir /root/.kaggle

# with open("/root/.kaggle/kaggle.json", "w+") as f:
#     f.write('{"username":"Replace this with your Kaggle Username","key":"Replace this with your kaggle API key"}')
#     # Put your kaggle username & key here

# !chmod 600 /root/.kaggle/kaggle.json

In [7]:
# commands to download data from kaggle
# !kaggle competitions download -c 11785-hw1p2-s24

# !unzip -qo /content/11785-hw1p2-s24.zip -d '/content'

# Dataset

This section covers the dataset/dataloader class for speech data. You will have to spend time writing code to create this class successfully. We have given you a lot of comments guiding you on what code to write at each stage, from top to bottom of the class. Please try and take your time figuring this out, as it will immensely help in creating dataset/dataloader classes for future homeworks.

Before running the following cells, please take some time to analyse the structure of data. Try loading a single MFCC and its transcipt, print out the shapes and print out the values. Do the transcripts look like phonemes?

In [8]:
# Dataset class to load train and validation data
from torch.nn.functional import one_hot
from torch.utils.data import Dataset, DataLoader
#TODO: dynamic padding
class AudioDataset(Dataset):
    def __init__(self, root=DATA_ROOT, phonemes = PHONEMES, context=0, partition= "train-clean-100", use_cmn=False): #TODO: make no-pad works (BF2042 meme huh?) 
        self.max_context_length = 1145 #Magic number
        self.set_context_length(context)
        self.phonemes   = phonemes
        
        self.num_phonemes = len(self.phonemes)
        
        self.mfccs, self.transcripts = self._init_data(f"{root}/{partition}", use_cmn=use_cmn)
        self.length = len(self.mfccs)
        
        self.mfccs = np.concatenate([np.zeros((self.max_context_length, 27)), self.mfccs, np.zeros((self.max_context_length, 27))], axis=0)
        
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        # given current context length, compute offset:
        lower = idx + self.max_context_length - self.context
        upper = idx + self.max_context_length + self.context + 1
            

        # TODO: Based on context and offset, return a frame at given index with context frames to the left, and right.
        frames = self.mfccs[lower:upper]
        
        # After slicing, you get an array of shape 2*context+1 x 27. But our MLP needs 1d data and not 2d.
        frames = frames.flatten() # Reshape to 1d array

        frames      = torch.FloatTensor(frames) # Convert to tensors
        phonemes    = torch.tensor(self.transcripts[idx]) # Get the phoneme at the index

        return frames, phonemes
        
        
    def _init_data(self, root: str, use_cmn = False):
        self.mfcc_dir       = f"{root}/mfcc"
        self.transcript_dir = f"{root}/transcript"
        mfcc_names          = os.listdir(self.mfcc_dir)
        transcript_names    = os.listdir(self.transcript_dir)
        
        assert len(mfcc_names) == len(transcript_names)

        self.mfccs, self.transcripts = [], []
        for i in tqdm(range(len(mfcc_names))):
        #   Load a single mfcc
            mfcc        = np.load(f"{self.mfcc_dir}/{mfcc_names[i]}")
        #   Do Cepstral Normalization of mfcc (explained in writeup)
            if use_cmn:
                mfcc = mfcc - np.mean(mfcc, axis=0)
        #   Load the corresponding transcript
            transcript  = np.load(f"{self.transcript_dir}/{transcript_names[i]}") 
            # Remove [SOS] and [EOS] from the transcript
            assert transcript[0] == '[SOS]' and transcript[-1] == '[EOS]'
            transcript = transcript[1:-1]
            #lookup phoneme index
            transcript = np.vectorize(self.phonemes.index)(transcript)
            assert len(mfcc) == len(transcript)
            # (Is there an efficient way to do this without traversing through the transcript?)
            # Note that SOS will always be in the starting and EOS at end, as the name suggests.
        #   Append each mfcc to self.mfcc, transcript to self.transcript
            self.mfccs.append(mfcc)
            self.transcripts.append(transcript)
            
        return np.concatenate(self.mfccs, axis=0), np.concatenate(self.transcripts, axis=0)
    
    def set_context_length(self, context):
        self.context = context
        
    def phoneme_reverse_lookup(self, idx: torch.tensor) -> str:
        return self.phonemes[idx]
         
         
val_data = AudioDataset(partition="dev-clean", context=5)

100%|██████████| 2703/2703 [00:39<00:00, 67.98it/s]


In [9]:
f, p = val_data[114514]
print(f.shape, p, val_data.phoneme_reverse_lookup(p))

torch.Size([297]) tensor(3) AH


In [10]:
from numpy import ndarray


class AudioTestDataset(AudioDataset):
    
    def _init_data(self, root: str, use_cmn):
        
        self.mfcc_dir = f"{root}/mfcc"

        mfcc_names = os.listdir(self.mfcc_dir)

        self.mfccs, self.transcripts = [], []
        
        for i in tqdm(range(len(mfcc_names))):
        #   Load a single mfcc
            mfcc        = np.load(f"{self.mfcc_dir}/{mfcc_names[i]}")
            transcript = np.array([0 for _ in range(len(mfcc))])
            
            assert len(mfcc) == len(transcript)
            
            self.mfccs.append(mfcc)
            self.transcripts.append(transcript)
            
        return np.concatenate(self.mfccs, axis=0), np.concatenate(self.transcripts, axis=0)
    
    def __getitem__(self, ind):
        return super().__getitem__(ind)[0]
    

test_data = AudioTestDataset(partition="test-clean", context=5)

100%|██████████| 2620/2620 [00:20<00:00, 130.36it/s]


In [11]:
f = test_data[114514]
print(f.shape)

torch.Size([297])


In [12]:
train_data = AudioDataset(partition="train-clean-100", context=5)

100%|██████████| 28539/28539 [07:11<00:00, 66.12it/s]


# Network Architecture


This section defines your network architecture for the homework. We have given you a sample architecture that can easily clear the very low cutoff for the early submission deadline.

In [13]:
# This architecture will make you cross the very low cutoff
# However, you need to run a lot of experiments to cross the medium or high cutoff
class LowCutoffNet(torch.nn.Module):

    def __init__(self, input_size, output_size):

        super(LowCutoffNet, self).__init__()

        self.model = torch.nn.Sequential(
            torch.nn.Linear(input_size, 256),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(256, 2048),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(2048, output_size)
        )

    def forward(self, x):
        out = self.model(x)

        return out

In [14]:
from torch import nn
from torch.nn.modules import Module

class DynamicMlpNet(torch.nn.Module):
    def __init__(self, input_size, output_size, hidden_sizes, dropout_rate):
        super(DynamicMlpNet, self).__init__()
        self.layers = []
        for i, hs in enumerate(hidden_sizes):
            self.layers.extend(self._mlp_layer_provider(input_size, hs, dropout_rate))
            input_size = hs
        self.layers.append(nn.Linear(input_size, output_size)) # output
        self.model = nn.Sequential(*self.layers)
    
    def forward(self, x):
        out = self.model(x)
        return out
    
    def _mlp_layer_provider(self, input_size, hidden_size, dropout_rate) -> list[nn.Module]:
        return [
            nn.Linear(input_size, hidden_size),
            nn.LeakyReLU(),
            nn.BatchNorm1d(hidden_size),
            nn.Dropout(dropout_rate)
        ]
    
    def describe() -> str:
        return "base model, don't use this"
        
class NetV2(DynamicMlpNet):
    def __init__(self, input_size, output_size, dropout_rate, hidden_sizes=np.array([1,2,4,2,1]) * 512):
        return super().__init__(input_size, output_size, hidden_sizes, dropout_rate)
    
    def _mlp_layer_provider(self, input_size, hidden_size, dropout_rate) -> list[Module]:
        return [
            nn.Linear(input_size, hidden_size),
            nn.Mish(),
            nn.Dropout(dropout_rate)
        ]
    
    def describe() -> str:
        return "5layer_mlp_v2"
    

In [15]:
#Param size 23M
class NetV3(DynamicMlpNet):
    def __init__(self, input_size, output_size, dropout_rate, hidden_sizes=np.array([1,2,4,8,4,2,1]) * 512):
        return super().__init__(input_size, output_size, hidden_sizes, dropout_rate)
    
    def _mlp_layer_provider(self, input_size, hidden_size, dropout_rate) -> list[Module]:
        return [
            nn.Linear(input_size, hidden_size),
            nn.Mish(),
            nn.BatchNorm1d(hidden_size),
            nn.Dropout(dropout_rate)
        ]
    
    def describe() -> str:
        return "7layer_mlp_v3"

In [16]:
#Non-symmetric shape, param 21M
class NetV4(NetV3):
    def __init__(self, input_size, output_size, dropout_rate, hidden_sizes=np.array([8,16,4,1,4,8,4,2]) * 256):
        return super().__init__(input_size, output_size, dropout_rate, hidden_sizes) 
    
    def describe() -> str:
        return "8layer_mlp_asym_v3"

In [17]:
class DynamicDenseMlp(nn.Module):
    def __init__(self, input_size, output_size, dropout_rate, hidden_sizes):
        super(DynamicDenseMlp, self).__init__()
        feedforward_size = 0
        self.dense_layers = []
        for i, hidden_size in enumerate(hidden_sizes):
            self.dense_layers.append(nn.Sequential(*self._mlp_layer_provider(input_size + feedforward_size, hidden_size, dropout_rate)))
            # Don't skip input:
            if i > 0:
                feedforward_size += input_size
            input_size = hidden_size
        self.dense_layers.append(nn.Linear(input_size + feedforward_size, output_size))
        self.model = nn.Sequential(*self.dense_layers)


    def forward(self, x):
        current_layer_input = x
        for i, layer in enumerate(self.dense_layers):
            out = layer(current_layer_input)
            if i > 0: # Don't skip input
                current_layer_input = torch.cat((current_layer_input, out), dim=1)
            else:
                current_layer_input = out
        return out    

    def _mlp_layer_provider(self, input_size, hidden_size, dropout_rate) -> list[Module]:
        return [
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
        ]

    def describe() -> str:
        return "dense_mlp_base"    

#param 21M    
class DenseNetV2(DynamicDenseMlp):
    def __init__(self, input_size, output_size, dropout_rate, hidden_sizes):
        return super().__init__(input_size, output_size, dropout_rate, hidden_sizes=np.array([1,2,4,4,2,1]) * 512)
    
    def _mlp_layer_provider(self, input_size, hidden_size, dropout_rate) -> list[Module]:
        return [
            nn.Linear(input_size, hidden_size),
            nn.Mish(),
            nn.BatchNorm1d(hidden_size),
            nn.Dropout(dropout_rate)
        ]
    
    def describe() -> str:
        return "dense_mlp_v2_5layer"
        

#  Parameters Configuration and Create Datasets

Storing your parameters and hyperparameters in a single configuration dictionary makes it easier to keep track of them during each experiment. It can also be used with weights and biases to log your parameters for each experiment and keep track of them across multiple experiments.

In [18]:
model_class = NetV3
config = {
    'continue': False,
    'last_epoch': 0,
    'epochs'        : 100,
    'batch_size'    : 32768,
    'context'       : 40,
    'init_lr'       : 1e-3,
    'architecture'  : 'deep_mlp',
    'dropout'       : 0.2,
    'weight_decay'  : 1e-5,
    'scheduler_params'     : {'patience': 3, 'factor': 0.2, 'min-lr': 1e-8},
    # 'scheduler_params'     : {'tmax': 50},
    # Add more as you need them - e.g dropout values, weight decay, scheduler parameters
    'wandb_name': f"bs_32k_{model_class.describe()}_c40"
}


In [19]:
MODEL_ROOT = f"/mnt/e/Workspace/IDL/Models/hw1/11-785-s24-hw1p2/{config['wandb_name']}"
if not os.path.exists(MODEL_ROOT):
    os.makedirs(MODEL_ROOT)

In [20]:
#TODO: Create a dataset object using the AudioDataset class for the training data
train_data.set_context_length(config['context'])

# TODO: Create a dataset object using the AudioDataset class for the validation data
val_data.set_context_length(config['context'])

# TODO: Create a dataset object using the AudioTestDataset class for the test data
test_data.set_context_length(config['context'])

In [21]:
# Define dataloaders for train, val and test datasets
# Dataloaders will yield a batch of frames and phonemes of given batch_size at every iteration
# We shuffle train dataloader but not val & test dataloader. Why?
train_loader = torch.utils.data.DataLoader(
    dataset     = train_data,
    num_workers = 8,
    batch_size  = config['batch_size'],
    pin_memory  = True,
    shuffle     = True
)

val_loader = torch.utils.data.DataLoader(
    dataset     = val_data,
    num_workers = 4,
    batch_size  = config['batch_size'],
    pin_memory  = True,
    shuffle     = False
)

test_loader = torch.utils.data.DataLoader(
    dataset     = test_data,
    num_workers = 4,
    batch_size  = config['batch_size'],
    pin_memory  = True,
    shuffle     = False
)


print("Batch size     : ", config['batch_size'])
print("Context        : ", config['context'])
print("Input size     : ", (2*config['context']+1)*27)
print("Output symbols : ", len(PHONEMES))

print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Validation dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

Batch size     :  32768
Context        :  40
Input size     :  2187
Output symbols :  42
Train dataset samples = 36091157, batches = 1102
Validation dataset samples = 1928204, batches = 59
Test dataset samples = 1934138, batches = 60


In [22]:
# Testing code to check if your data loaders are working
# for i, data in enumerate(train_loader):
#     frames, phoneme = data
#     print(frames.shape, phoneme.shape)
#     break

# Define Model, Loss Function and Optimizer

Here we define the model, loss function, optimizer and optionally a learning rate scheduler.

In [23]:
INPUT_SIZE  = (2*config['context'] + 1) * 27 # Why is this the case?
# model       = LowCutoffNet(INPUT_SIZE, len(train_data.phonemes)).to(device)
if issubclass(model_class, DynamicMlpNet):
    model = model_class(input_size=INPUT_SIZE, output_size=len(train_data.phonemes), dropout_rate=config['dropout']).to(device)
elif issubclass(model_class, DynamicDenseMlp):
    model = model_class(input_size=INPUT_SIZE, output_size=len(train_data.phonemes), dropout_rate=config['dropout'], hidden_sizes=[64, 64, 64, 64]).to(device)
# Some upstream dependency bug in torchsummaryX, using torchinfo instead: 
import torchinfo
torchinfo.summary(model, input_size=(config['batch_size'], INPUT_SIZE))
# summary(model, frames.to(device))
# Check number of parameters of your network
# Remember, you are limited to 24 million parameters for HW1 (including ensembles)

Layer (type:depth-idx)                   Output Shape              Param #
NetV3                                    [32768, 42]               --
├─Sequential: 1-1                        [32768, 42]               --
│    └─Linear: 2-1                       [32768, 512]              1,120,256
│    └─Mish: 2-2                         [32768, 512]              --
│    └─BatchNorm1d: 2-3                  [32768, 512]              1,024
│    └─Dropout: 2-4                      [32768, 512]              --
│    └─Linear: 2-5                       [32768, 1024]             525,312
│    └─Mish: 2-6                         [32768, 1024]             --
│    └─BatchNorm1d: 2-7                  [32768, 1024]             2,048
│    └─Dropout: 2-8                      [32768, 1024]             --
│    └─Linear: 2-9                       [32768, 2048]             2,099,200
│    └─Mish: 2-10                        [32768, 2048]             --
│    └─BatchNorm1d: 2-11                 [32768, 2048]      

In [24]:
criterion = torch.nn.CrossEntropyLoss() # Defining Loss function.
# We use CE because the task is multi-class classification

optimizer = torch.optim.Adam(model.parameters(), lr= config['init_lr'], weight_decay=config['weight_decay'], ) #Defining Optimizer

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    patience=config['scheduler_params']['patience'], 
    min_lr=config['scheduler_params']['min-lr'], 
    factor=config['scheduler_params']['factor'],
    verbose=True
)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['scheduler_params']['tmax'], eta_min=1e-7, last_epoch=-1, verbose=True)
# Recommended : Define Scheduler for Learning Rate,
# including but not limited to StepLR, MultiStepLR, CosineAnnealingLR, ReduceLROnPlateau, etc.
# You can refer to Pytorch documentation for more information on how to use them.

# Is your training time very high?
# Look into mixed precision training if your GPU (Tesla T4, V100, etc) can make use of it
# Refer - https://pytorch.org/docs/stable/notes/amp_examples.html

# Training and Validation Functions

This section covers the training, and validation functions for each epoch of running your experiment with a given model architecture. The code has been provided to you, but we recommend going through the comments to understand the workflow to enable you to write these loops for future HWs.

In [25]:
torch.cuda.empty_cache()
gc.collect()

6

In [26]:
from torch.cuda.amp import GradScaler

def train(model, dataloader, optimizer, criterion, scheduler, logger, log_freq=100, use_amp=True):
    scaler = GradScaler()
    model.train()
    tloss, tacc = 0, 0 # Monitoring loss and accuracy
    batch_bar   = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train')
    
    if use_amp:
        for i, (frames, phonemes) in enumerate(dataloader):
            ### Initialize Gradients
            optimizer.zero_grad()

            ### Move Data to Device (Ideally GPU)
            frames      = frames.to(device)
            phonemes    = phonemes.to(device)

            ### Forward Propagation
            with torch.autocast(device_type='cuda', dtype=frames.dtype):
                logits  = model(frames)
                ### Loss Calculation
               
                loss    = criterion(logits, phonemes)

            ### Backward Propagation
            scaler.scale(loss).backward()

            ### Gradient Descent
            scaler.step(optimizer)
            scaler.update()
        

            tloss   += loss.item()
            # print(torch.argmax(logits, dim= 1), phonemes.shape, tloss)
            tacc    += torch.sum(torch.argmax(logits, dim= 1) == phonemes).item()/logits.shape[0]
            
            # raise Exception("Shape mismatch")
            

            batch_bar.set_postfix(loss="{:.04f}".format(float(tloss / (i + 1))),
                                acc="{:.04f}%".format(float(tacc*100 / (i + 1))))
            batch_bar.update()

            ### Release memory
            del frames, phonemes, logits
            if (i+1)%log_freq == 0:
                logger(i, tloss / (i+1), tacc*100 / (i+1))
            # torch.cuda.empty_cache()
    else:
        for i, (frames, phonemes) in enumerate(dataloader):
            ### Initialize Gradients
            optimizer.zero_grad()

            ### Move Data to Device (Ideally GPU)
            frames      = frames.to(device)
            phonemes    = phonemes.to(device)

            ### Forward Propagation
            logits  = model(frames)
            ### Loss Calculation
            loss    = criterion(logits, phonemes)
                

            ### Backward Propagation
            loss.backward()

            ### Gradient Descent
            optimizer.step()
            

            tloss   += loss.item()
            tacc    += torch.sum(torch.argmax(logits, dim= 1) == phonemes).item()/logits.shape[0]

            batch_bar.set_postfix(loss="{:.04f}".format(float(tloss / (i + 1))),
                                acc="{:.04f}%".format(float(tacc*100 / (i + 1))))
            batch_bar.update()

            ### Release memory
            del frames, phonemes, logits
            if (i+1)%log_freq == 0:
                logger(i, tloss / (i+1), tacc*100 / (i+1))
            # torch.cuda.empty_cache()

    batch_bar.close()
    tloss   /= len(train_loader)
    tacc    /= len(train_loader)

    return tloss, tacc

In [27]:
def eval(model, dataloader):

    model.eval() # set model in evaluation mode
    vloss, vacc = 0, 0 # Monitoring loss and accuracy
    batch_bar   = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')

    for i, (frames, phonemes) in enumerate(dataloader):

        ### Move data to device (ideally GPU)
        frames      = frames.to(device)
        phonemes    = phonemes.to(device)

        # makes sure that there are no gradients computed as we are not training the model now
        with torch.inference_mode():
            ### Forward Propagation
            logits  = model(frames)
            ### Loss Calculation
            loss    = criterion(logits, phonemes)

        vloss   += loss.item()
        vacc    += torch.sum(torch.argmax(logits, dim= 1) == phonemes).item()/logits.shape[0]

        # Do you think we need loss.backward() and optimizer.step() here?

        batch_bar.set_postfix(loss="{:.04f}".format(float(vloss / (i + 1))),
                              acc="{:.04f}%".format(float(vacc*100 / (i + 1))))
        batch_bar.update()

        ### Release memory
        del frames, phonemes, logits
        torch.cuda.empty_cache()
        

    batch_bar.close()
    vloss   /= len(val_loader)
    vacc    /= len(val_loader)

    return vloss, vacc

# Weights and Biases Setup

This section is to enable logging metrics and files with Weights and Biases. Please refer to wandb documentationa and recitation 0 that covers the use of weights and biases for logging, hyperparameter tuning and monitoring your runs for your homeworks. Using this tool makes it very easy to show results when submitting your code and models for homeworks, and also extremely useful for study groups to organize and run ablations under a single team in wandb.

We have written code for you to make use of it out of the box, so that you start using wandb for all your HWs from the beginning.

In [28]:
wandb.login(key="a07bacf1f6490c2d1a0d4e22dd08701319310f93") #API Key is in your wandb account, under settings (wandb.ai/settings)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mzzyatcmu[0m ([33mschool_stuff[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/zzy/.netrc


True

In [29]:
# # Create your wandb run
run = wandb.init(
    name    = config['wandb_name'], ### Wandb creates random run names if you skip this field, we recommend you give useful names
    reinit  = True, ### Allows reinitalizing runs when you re-run this cell
    #id     = "y28t31uz", ### Insert specific run id here if you want to resume a previous run
    #resume = "must", ### You need this to resume previous runs, but comment out reinit = True when using this
    project = "hw1p2", ### Project should be created in your wandb account
    config  = config, ### Wandb Config for your run
    dir = MODEL_ROOT ### Wandb local directory
)

In [30]:
# ### Save your model architecture as a string with str(model)
model_arch  = str(model)

# ### Save it in a txt file
arch_file   = open(f"{MODEL_ROOT}/model_arch.txt", "w")
file_write  = arch_file.write(model_arch)
arch_file.close()

# ### log it in your wandb run with wandb.save()
wandb.save(arch_file.name)



['/mnt/e/Workspace/IDL/Models/hw1/11-785-s24-hw1p2/bs_32k_7layer_mlp_v3_c40/wandb/run-20240207_174507-4ek4p5lj/files/model_arch.txt']

# Experiment

Now, it is time to finally run your ablations! Have fun!

In [31]:
def wandb_logger(iter, loss, acc):
    wandb.log({"train_loss": loss, "train_acc": acc})

In [32]:
# Iterate over number of epochs to train and evaluate your model
torch.cuda.empty_cache()
gc.collect()
wandb.watch(model, log="all")


#Continue training
if config['continue']:
    last_epoch = config['last_epoch']
    model.load_state_dict(torch.load(f"{MODEL_ROOT}/model_{last_epoch}.cpt"))
    initial_epoch = last_epoch + 1
    scheduler.last_epoch = initial_epoch
else:
    initial_epoch = 0

for epoch in range(config['epochs']):
    if epoch < initial_epoch:
        continue

    print("\nEpoch {}/{}".format(epoch+1, config['epochs']))

    curr_lr                 = float(optimizer.param_groups[0]['lr'])
    train_loss, train_acc   = train(model, train_loader, optimizer, criterion, scheduler, logger=wandb_logger, log_freq=100, use_amp=False)
    
    val_loss, val_acc       = eval(model, val_loader)
    scheduler.step(val_loss)

    print("\tTrain Acc {:.04f}%\tTrain Loss {:.04f}\t Learning Rate {:.07f}".format(train_acc*100, train_loss, curr_lr))
    print("\tVal Acc {:.04f}%\tVal Loss {:.04f}".format(val_acc*100, val_loss))

    ### Log metrics at each epoch in your run
    # Optionally, you can log at each batch inside train/eval functions
    # (explore wandb documentation/wandb recitation)
    wandb.log({'train_acc': train_acc*100, 'train_loss': train_loss,
               'val_acc': val_acc*100, 'valid_loss': val_loss, 'lr': curr_lr})
    

    ### Highly Recommended: Save checkpoint in drive and/or wandb if accuracy is better than your current best
    torch.save(model.state_dict(), f"{MODEL_ROOT}/model_{epoch}.cpt")

### Finish your wandb run
run.finish()
# Baseline model 1st epoch:
#   Train Acc 51.7162%	Train Loss 1.6885	 Learning Rate 0.0010000
#	Val Acc 63.6595%	Val Loss 1.1859
#7-layer v3 model 1st epoch:
#   Train Acc 69.9118%	Train Loss 0.9459	 Learning Rate 0.0010000
#	Val Acc 77.559%	Val Loss 0.6768
#8layer v4 model 1st epoch:
#   Train Acc 69.1642%	Train Loss 0.9956	 Learning Rate 0.0010000
#	Val Acc 76.8617%	Val Loss 0.7113
#dense v2 model 1st epoch:
#   Train Acc 68.18843%	Train Loss 1.0120	 Learning Rate 0.0010000
#	Val Acc 76.7886%	Val Loss 0.7109


Epoch 1/100


                                                                                     

	Train Acc 69.6696%	Train Loss 0.9561	 Learning Rate 0.0010000
	Val Acc 77.7080%	Val Loss 0.6700

Epoch 2/100


                                                                                     

	Train Acc 78.0899%	Train Loss 0.6619	 Learning Rate 0.0010000
	Val Acc 80.3804%	Val Loss 0.5851

Epoch 3/100


                                                                                     

	Train Acc 79.8996%	Train Loss 0.6008	 Learning Rate 0.0010000
	Val Acc 81.3680%	Val Loss 0.5531

Epoch 4/100


                                                                                     

	Train Acc 80.8606%	Train Loss 0.5688	 Learning Rate 0.0010000
	Val Acc 81.9136%	Val Loss 0.5359

Epoch 5/100


                                                                                     

	Train Acc 81.4917%	Train Loss 0.5479	 Learning Rate 0.0010000
	Val Acc 82.3512%	Val Loss 0.5228

Epoch 6/100


                                                                                     

	Train Acc 81.9430%	Train Loss 0.5329	 Learning Rate 0.0010000
	Val Acc 82.5618%	Val Loss 0.5168

Epoch 7/100


                                                                                     

	Train Acc 82.2806%	Train Loss 0.5216	 Learning Rate 0.0010000
	Val Acc 82.8178%	Val Loss 0.5084

Epoch 8/100


                                                                                     

	Train Acc 82.5675%	Train Loss 0.5123	 Learning Rate 0.0010000
	Val Acc 83.1056%	Val Loss 0.5003

Epoch 9/100


                                                                                     

	Train Acc 82.7980%	Train Loss 0.5043	 Learning Rate 0.0010000
	Val Acc 83.0779%	Val Loss 0.5009

Epoch 10/100


                                                                                     

	Train Acc 83.0087%	Train Loss 0.4973	 Learning Rate 0.0010000
	Val Acc 83.4159%	Val Loss 0.4918

Epoch 11/100


                                                                                     

	Train Acc 83.1811%	Train Loss 0.4917	 Learning Rate 0.0010000
	Val Acc 83.4927%	Val Loss 0.4885

Epoch 12/100


                                                                                     

	Train Acc 83.3265%	Train Loss 0.4868	 Learning Rate 0.0010000
	Val Acc 83.4793%	Val Loss 0.4882

Epoch 13/100


                                                                                     

	Train Acc 83.4513%	Train Loss 0.4826	 Learning Rate 0.0010000
	Val Acc 83.7553%	Val Loss 0.4806

Epoch 14/100


                                                                                     

	Train Acc 83.5699%	Train Loss 0.4788	 Learning Rate 0.0010000
	Val Acc 83.6604%	Val Loss 0.4843

Epoch 15/100


                                                                                     

	Train Acc 83.6706%	Train Loss 0.4754	 Learning Rate 0.0010000
	Val Acc 83.8773%	Val Loss 0.4772

Epoch 16/100


                                                                                     

	Train Acc 83.7595%	Train Loss 0.4724	 Learning Rate 0.0010000
	Val Acc 83.9644%	Val Loss 0.4747

Epoch 17/100


                                                                                     

	Train Acc 83.8423%	Train Loss 0.4698	 Learning Rate 0.0010000
	Val Acc 83.8591%	Val Loss 0.4797

Epoch 18/100


                                                                                     

	Train Acc 83.9163%	Train Loss 0.4673	 Learning Rate 0.0010000
	Val Acc 83.9399%	Val Loss 0.4769

Epoch 19/100


                                                                                     

	Train Acc 83.9848%	Train Loss 0.4650	 Learning Rate 0.0010000
	Val Acc 83.8290%	Val Loss 0.4810

Epoch 20/100


                                                                                     

Epoch 00020: reducing learning rate of group 0 to 2.0000e-04.
	Train Acc 84.0579%	Train Loss 0.4628	 Learning Rate 0.0010000
	Val Acc 83.9426%	Val Loss 0.4762

Epoch 21/100


                                                                                     

	Train Acc 85.8544%	Train Loss 0.4031	 Learning Rate 0.0002000
	Val Acc 85.4330%	Val Loss 0.4334

Epoch 22/100


                                                                                     

	Train Acc 86.4898%	Train Loss 0.3814	 Learning Rate 0.0002000
	Val Acc 85.5476%	Val Loss 0.4338

Epoch 23/100


                                                                                     

	Train Acc 86.7503%	Train Loss 0.3722	 Learning Rate 0.0002000
	Val Acc 85.5104%	Val Loss 0.4365

Epoch 24/100


                                                                                     

	Train Acc 86.9320%	Train Loss 0.3662	 Learning Rate 0.0002000
	Val Acc 85.5130%	Val Loss 0.4373

Epoch 25/100


                                                                                     

Epoch 00025: reducing learning rate of group 0 to 4.0000e-05.
	Train Acc 87.0529%	Train Loss 0.3618	 Learning Rate 0.0002000
	Val Acc 85.5574%	Val Loss 0.4372

Epoch 26/100


                                                                                     

	Train Acc 87.5705%	Train Loss 0.3451	 Learning Rate 0.0000400
	Val Acc 85.8006%	Val Loss 0.4333

Epoch 27/100


                                                                                     

	Train Acc 87.7658%	Train Loss 0.3388	 Learning Rate 0.0000400
	Val Acc 85.8137%	Val Loss 0.4344

Epoch 28/100


                                                                                     

	Train Acc 87.8581%	Train Loss 0.3357	 Learning Rate 0.0000400
	Val Acc 85.8573%	Val Loss 0.4338

Epoch 29/100


                                                                                     

	Train Acc 87.9350%	Train Loss 0.3333	 Learning Rate 0.0000400
	Val Acc 85.8454%	Val Loss 0.4346

Epoch 30/100


                                                                                     

Epoch 00030: reducing learning rate of group 0 to 8.0000e-06.
	Train Acc 87.9918%	Train Loss 0.3313	 Learning Rate 0.0000400
	Val Acc 85.8486%	Val Loss 0.4348

Epoch 31/100


                                                                                     

	Train Acc 88.0983%	Train Loss 0.3278	 Learning Rate 0.0000080
	Val Acc 85.8830%	Val Loss 0.4350

Epoch 32/100


                                                                                     

	Train Acc 88.1309%	Train Loss 0.3267	 Learning Rate 0.0000080
	Val Acc 85.8894%	Val Loss 0.4351

Epoch 33/100


                                                                                     

	Train Acc 88.1522%	Train Loss 0.3260	 Learning Rate 0.0000080
	Val Acc 85.8897%	Val Loss 0.4351

Epoch 34/100


                                                                                     

Epoch 00034: reducing learning rate of group 0 to 1.6000e-06.
	Train Acc 88.1706%	Train Loss 0.3254	 Learning Rate 0.0000080
	Val Acc 85.8869%	Val Loss 0.4352

Epoch 35/100


                                                                                     

	Train Acc 88.1937%	Train Loss 0.3246	 Learning Rate 0.0000016
	Val Acc 85.8953%	Val Loss 0.4352

Epoch 36/100


                                                                                     

	Train Acc 88.2015%	Train Loss 0.3245	 Learning Rate 0.0000016
	Val Acc 85.8929%	Val Loss 0.4354

Epoch 37/100


                                                                                     

	Train Acc 88.2082%	Train Loss 0.3242	 Learning Rate 0.0000016
	Val Acc 85.8896%	Val Loss 0.4355

Epoch 38/100


                                                                                     

Epoch 00038: reducing learning rate of group 0 to 3.2000e-07.
	Train Acc 88.2114%	Train Loss 0.3242	 Learning Rate 0.0000016
	Val Acc 85.8862%	Val Loss 0.4356

Epoch 39/100


                                                                                     

	Train Acc 88.2146%	Train Loss 0.3240	 Learning Rate 0.0000003
	Val Acc 85.8954%	Val Loss 0.4353

Epoch 40/100


                                                                                     

	Train Acc 88.2183%	Train Loss 0.3241	 Learning Rate 0.0000003
	Val Acc 85.8911%	Val Loss 0.4355

Epoch 41/100


                                                                                     

	Train Acc 88.2168%	Train Loss 0.3240	 Learning Rate 0.0000003
	Val Acc 85.8931%	Val Loss 0.4355

Epoch 42/100


                                                                                     

Epoch 00042: reducing learning rate of group 0 to 6.4000e-08.
	Train Acc 88.2185%	Train Loss 0.3239	 Learning Rate 0.0000003
	Val Acc 85.8953%	Val Loss 0.4355

Epoch 43/100


                                                                                     

	Train Acc 88.2194%	Train Loss 0.3239	 Learning Rate 0.0000001
	Val Acc 85.8900%	Val Loss 0.4355

Epoch 44/100


                                                                                     

	Train Acc 88.2129%	Train Loss 0.3241	 Learning Rate 0.0000001
	Val Acc 85.8936%	Val Loss 0.4356

Epoch 45/100


                                                                                     

	Train Acc 88.2292%	Train Loss 0.3238	 Learning Rate 0.0000001
	Val Acc 85.8920%	Val Loss 0.4355

Epoch 46/100


                                                                                     

Epoch 00046: reducing learning rate of group 0 to 1.2800e-08.
	Train Acc 88.2155%	Train Loss 0.3240	 Learning Rate 0.0000001
	Val Acc 85.8879%	Val Loss 0.4357

Epoch 47/100


                                                                                     

	Train Acc 88.2210%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8947%	Val Loss 0.4354

Epoch 48/100


                                                                                     

	Train Acc 88.2248%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8933%	Val Loss 0.4352

Epoch 49/100


                                                                                     

	Train Acc 88.2231%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8988%	Val Loss 0.4354

Epoch 50/100


                                                                                     

	Train Acc 88.2245%	Train Loss 0.3237	 Learning Rate 0.0000000
	Val Acc 85.8949%	Val Loss 0.4354

Epoch 51/100


                                                                                     

	Train Acc 88.2231%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8943%	Val Loss 0.4354

Epoch 52/100


                                                                                     

	Train Acc 88.2181%	Train Loss 0.3240	 Learning Rate 0.0000000
	Val Acc 85.8974%	Val Loss 0.4353

Epoch 53/100


                                                                                     

	Train Acc 88.2194%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8905%	Val Loss 0.4355

Epoch 54/100


                                                                                     

	Train Acc 88.2209%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8918%	Val Loss 0.4356

Epoch 55/100


                                                                                     

	Train Acc 88.2119%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8886%	Val Loss 0.4356

Epoch 56/100


                                                                                     

	Train Acc 88.2208%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8922%	Val Loss 0.4354

Epoch 57/100


                                                                                     

	Train Acc 88.2200%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8929%	Val Loss 0.4355

Epoch 58/100


                                                                                     

	Train Acc 88.2128%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8936%	Val Loss 0.4354

Epoch 59/100


                                                                                     

	Train Acc 88.2130%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8893%	Val Loss 0.4355

Epoch 60/100


                                                                                     

	Train Acc 88.2162%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8926%	Val Loss 0.4355

Epoch 61/100


                                                                                     

	Train Acc 88.2210%	Train Loss 0.3237	 Learning Rate 0.0000000
	Val Acc 85.8937%	Val Loss 0.4353

Epoch 62/100


                                                                                     

	Train Acc 88.2187%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8927%	Val Loss 0.4354

Epoch 63/100


                                                                                     

	Train Acc 88.2146%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8878%	Val Loss 0.4356

Epoch 64/100


                                                                                     

	Train Acc 88.2240%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8886%	Val Loss 0.4355

Epoch 65/100


                                                                                     

	Train Acc 88.2207%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8924%	Val Loss 0.4353

Epoch 66/100


                                                                                     

	Train Acc 88.2245%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8920%	Val Loss 0.4355

Epoch 67/100


                                                                                     

	Train Acc 88.2223%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8919%	Val Loss 0.4354

Epoch 68/100


                                                                                     

	Train Acc 88.2188%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8928%	Val Loss 0.4354

Epoch 69/100


                                                                                     

	Train Acc 88.2266%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8898%	Val Loss 0.4356

Epoch 70/100


                                                                                     

	Train Acc 88.2140%	Train Loss 0.3240	 Learning Rate 0.0000000
	Val Acc 85.8945%	Val Loss 0.4355

Epoch 71/100


                                                                                     

	Train Acc 88.2221%	Train Loss 0.3237	 Learning Rate 0.0000000
	Val Acc 85.8902%	Val Loss 0.4356

Epoch 72/100


                                                                                     

	Train Acc 88.2183%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8919%	Val Loss 0.4354

Epoch 73/100


                                                                                     

	Train Acc 88.2214%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8942%	Val Loss 0.4356

Epoch 74/100


                                                                                     

	Train Acc 88.2222%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8887%	Val Loss 0.4354

Epoch 75/100


                                                                                     

	Train Acc 88.2142%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8921%	Val Loss 0.4353

Epoch 76/100


                                                                                     

	Train Acc 88.2232%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8920%	Val Loss 0.4354

Epoch 77/100


                                                                                     

	Train Acc 88.2183%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8906%	Val Loss 0.4355

Epoch 78/100


                                                                                     

	Train Acc 88.2221%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8891%	Val Loss 0.4355

Epoch 79/100


                                                                                     

	Train Acc 88.2200%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8943%	Val Loss 0.4353

Epoch 80/100


                                                                                     

	Train Acc 88.2222%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8912%	Val Loss 0.4352

Epoch 81/100


                                                                                     

	Train Acc 88.2213%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8930%	Val Loss 0.4354

Epoch 82/100


                                                                                     

	Train Acc 88.2232%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8907%	Val Loss 0.4356

Epoch 83/100


                                                                                     

	Train Acc 88.2228%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8883%	Val Loss 0.4357

Epoch 84/100


                                                                                     

	Train Acc 88.2181%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8898%	Val Loss 0.4354

Epoch 85/100


                                                                                     

	Train Acc 88.2239%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8918%	Val Loss 0.4354

Epoch 86/100


                                                                                     

	Train Acc 88.2234%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8925%	Val Loss 0.4356

Epoch 87/100


                                                                                     

	Train Acc 88.2230%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8943%	Val Loss 0.4353

Epoch 88/100


                                                                                     

	Train Acc 88.2206%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8930%	Val Loss 0.4355

Epoch 89/100


                                                                                     

	Train Acc 88.2207%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8941%	Val Loss 0.4355

Epoch 90/100


                                                                                     

	Train Acc 88.2263%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8899%	Val Loss 0.4355

Epoch 91/100


                                                                                     

	Train Acc 88.2226%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8926%	Val Loss 0.4355

Epoch 92/100


                                                                                     

	Train Acc 88.2268%	Train Loss 0.3237	 Learning Rate 0.0000000
	Val Acc 85.8963%	Val Loss 0.4353

Epoch 93/100


                                                                                     

	Train Acc 88.2192%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8940%	Val Loss 0.4354

Epoch 94/100


                                                                                     

	Train Acc 88.2163%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8946%	Val Loss 0.4355

Epoch 95/100


                                                                                     

	Train Acc 88.2190%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8942%	Val Loss 0.4355

Epoch 96/100


                                                                                     

	Train Acc 88.2205%	Train Loss 0.3239	 Learning Rate 0.0000000
	Val Acc 85.8927%	Val Loss 0.4356

Epoch 97/100


                                                                                     

	Train Acc 88.2196%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8930%	Val Loss 0.4355

Epoch 98/100


                                                                                     

	Train Acc 88.2253%	Train Loss 0.3238	 Learning Rate 0.0000000
	Val Acc 85.8913%	Val Loss 0.4354

Epoch 99/100


                                                                                     

	Train Acc 88.2268%	Train Loss 0.3237	 Learning Rate 0.0000000
	Val Acc 85.8964%	Val Loss 0.4355

Epoch 100/100


                                                                                     

	Train Acc 88.2174%	Train Loss 0.3240	 Learning Rate 0.0000000
	Val Acc 85.8944%	Val Loss 0.4354




0,1
lr,████████▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_acc,▁▃▄▄▅▅▅▅▇▇██████████████████████████████
train_loss,█▆▅▅▄▄▄▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▄▅▆▆▆▆▆████████████████████████████████
valid_loss,█▅▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
lr,0.0
train_acc,88.21744
train_loss,0.32397
val_acc,85.8944
valid_loss,0.43535


# Testing and submission to Kaggle

Before we get to the following code, make sure to see the format of submission given in *sample_submission.csv*. Once you have done so, it is time to fill the following function to complete your inference on test data. Refer the eval function from previous cells to get an idea of how to go about completing this function.

In [33]:
def test(model, test_loader):
    ### What you call for model to perform inference?
    model.eval() # TODO train or eval?

    ### List to store predicted phonemes of test data
    test_predictions = []

    ### Which mode do you need to avoid gradients?
    with torch.no_grad(): 

        for i, mfccs in enumerate(tqdm(test_loader)):

            mfccs   = mfccs.to(device)

            logits  = model(mfccs)

            ### Get most likely predicted phoneme with argmax
            max_idxs = torch.argmax(logits, dim=1)
            
            predicted_phonemes = [test_loader.dataset.phoneme_reverse_lookup(max_idx) for max_idx in max_idxs]

            ### How do you store predicted_phonemes with test_predictions? Hint, look at eval
            test_predictions.extend(predicted_phonemes)
    return test_predictions

In [34]:
predictions = test(model, test_loader)

100%|██████████| 60/60 [01:09<00:00,  1.16s/it]


In [35]:
### Create CSV file with predictions
with open(f"{MODEL_ROOT}/submission_latest.csv", "w+") as f:
    f.write("id,label\n")
    for i in range(len(predictions)):
        f.write("{},{}\n".format(i, predictions[i]))

In [36]:
### Submit to kaggle competition using kaggle API (Uncomment below to use)
# !kaggle competitions submit -c 11785-hw1p2-s24 -f ./submission.csv -m "Test Submission"

### However, its always safer to download the csv file and then upload to kaggle