In [1]:
# IMPORTS
# Numerical Operations
import math
import numpy as np

# Reading/Writing Data
import pandas as pd
import os
import csv

# For Progress Bar
from tqdm import tqdm

# Pytorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

# For plotting learning curve
from torch.utils.tensorboard import SummaryWriter

# Other Common Modules
from datetime import datetime, timedelta
import importlib


# IMPORT COMPLETE
print("Imports Done")

Imports Done


In [2]:
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    timenow = datetime.now()+timedelta(hours=8)
    from google.colab import drive
    drive.mount('/content/drive')
    os.chdir("/content/drive/MyDrive/Chronical/2023Spring/ML_drive/MLHW2")
else:
    print('Running on Local')
    timenow = datetime.now()

BASE_PATH = os.getcwd()
DATA_PATH = os.path.join(BASE_PATH, "data")
SAVE_PATH = os.path.join(BASE_PATH, ".model")
OUTPUT_PATH = os.path.join(BASE_PATH, "output")

if not os.path.isdir(SAVE_PATH):
    os.mkdir(SAVE_PATH)
if not os.path.isdir(OUTPUT_PATH):
    os.mkdir(OUTPUT_PATH)

timenow_str = f"{timenow.hour:02d}{timenow.minute:02d}{timenow.month:02d}{timenow.day:02d}"

print(f"{BASE_PATH=}")
print(f"{DATA_PATH=}")
print(f"{SAVE_PATH=}")
print(f"{OUTPUT_PATH=}")
print(f"{timenow=}")
print(f"{timenow_str}")

Running on CoLab
Mounted at /content/drive
BASE_PATH='/content/drive/MyDrive/Chronical/2023Spring/ML_drive/MLHW2'
DATA_PATH='/content/drive/MyDrive/Chronical/2023Spring/ML_drive/MLHW2/data'
SAVE_PATH='/content/drive/MyDrive/Chronical/2023Spring/ML_drive/MLHW2/.model'
OUTPUT_PATH='/content/drive/MyDrive/Chronical/2023Spring/ML_drive/MLHW2/output'
timenow=datetime.datetime(2023, 7, 7, 18, 56, 37, 51195)
18560707


In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    "device": device,
    "seed": 3141592,
    "learning_rate": 1e-5,
    "epochs": 30000,
    "batch_size": 64,
    "valid_ratio": 0.2,
    "early_stop": 1000,
    "save_path": SAVE_PATH,
    "time_string": timenow_str
}
print(f"device: {torch.cuda.get_device_name(device)}")

device: Tesla T4


In [4]:
# RANDOMNESS FIXED
torch.manual_seed(config["seed"])
np.random.seed(config["seed"])
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(config["seed"])

print("Randomness Fixed")

Randomness Fixed


# DATASET & DATALOADER

In [5]:
import SoundData
importlib.reload(SoundData)

<module 'SoundData' from '/content/drive/MyDrive/Chronical/2023Spring/ML_drive/MLHW2/SoundData.py'>

In [6]:
sound_data_parser = SoundData.SoundDataParser(DATA_PATH)
train_dict = sound_data_parser.train_dict
test_dict = sound_data_parser.test_dict

print(f"{len(train_dict)=}")
print(f"{len(test_dict)=}")

100%|██████████| 4286/4286 [01:18<00:00, 54.85it/s, file=train 125-121124-0068.pt] 
100%|██████████| 1078/1078 [00:15<00:00, 71.00it/s, file=test 1116-132847-0017.pt] 

len(train_dict)=4286
len(test_dict)=1078





In [7]:
train_dataset = SoundData.SoundDataset(train_dict)
test_dataset = SoundData.SoundDataset(test_dict)
print(f"{len(train_dataset)=}")
print(f"{len(test_dataset)=}")

100%|██████████| 4286/4286 [00:23<00:00, 181.85it/s]
100%|██████████| 1078/1078 [00:05<00:00, 207.68it/s]

len(train_dataset)=2644158
len(test_dataset)=646268





In [8]:
original_train_length = len(train_dataset)
actual_valid_length = int(original_train_length*config["valid_ratio"])
actual_train_length = original_train_length - actual_valid_length
train_dataset, valid_dataset = random_split(train_dataset, [actual_train_length, actual_valid_length], generator=torch.Generator().manual_seed(config["seed"]))

train_loader = DataLoader(train_dataset, config["batch_size"], shuffle=True)
valid_loader = DataLoader(valid_dataset, config["batch_size"], shuffle=True)
test_loader = DataLoader(test_dataset, config["batch_size"], shuffle=False)

# Nural Network, Criteria, Optimizer

In [9]:
import SoundNetwork
importlib.reload(SoundNetwork)

<module 'SoundNetwork' from '/content/drive/MyDrive/Chronical/2023Spring/ML_drive/MLHW2/SoundNetwork.py'>

In [10]:
model = SoundNetwork.SoundNetwork(5)
model = model.to(device)
print(f"{model=}")

model=SoundNetwork(
  (net): Sequential(
    (0): Conv1d(39, 30, kernel_size=(3,), stride=(1,))
    (1): ReLU()
    (2): Conv1d(30, 15, kernel_size=(3,), stride=(1,))
    (3): ReLU()
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): Linear(in_features=105, out_features=80, bias=True)
    (6): ReLU()
    (7): Linear(in_features=80, out_features=60, bias=True)
    (8): ReLU()
    (9): Linear(in_features=60, out_features=41, bias=True)
    (10): ReLU()
    (11): Softmax(dim=1)
  )
  (conv_0): Conv1d(39, 30, kernel_size=(3,), stride=(1,))
  (conv_1): Conv1d(30, 15, kernel_size=(3,), stride=(1,))
  (linear_0): Linear(in_features=105, out_features=80, bias=True)
  (lienar_1): Linear(in_features=80, out_features=60, bias=True)
  (linear_2): Linear(in_features=60, out_features=41, bias=True)
  (softmax): Softmax(dim=None)
)


In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config["learning_rate"])
print(f"{criterion=}, {optimizer=}")

criterion=CrossEntropyLoss(), optimizer=Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 1e-05
    maximize: False
    weight_decay: 0
)


# Training

In [12]:
import Trainer
importlib.reload(Trainer)

<module 'Trainer' from '/content/drive/MyDrive/Chronical/2023Spring/ML_drive/MLHW2/Trainer.py'>

In [None]:
trainer = Trainer.Trainer(model, criterion, optimizer, train_loader, valid_loader, config)
trainer.train(config["epochs"])

48/30000:   0%|          | 49/30000 [1:20:04<813:46:53, 97.81s/it, train_loss=3.3311, valid_loss=3.3311, early_countdown=1000, accuracy=42.43%]

In [None]:
%reload_ext tensorboard
%tensorboard --logdir=./runs/

# Inference/Test

In [None]:
import Inferencer
importlib.reload(Inferencer)

In [None]:
inferencer = Inferencer.Inferencer(config, SoundNetwork.SoundNetwork, config["time_string"], test_loader)
inferencer.infer()