In [9]:
import torch as t
import torch.nn as nn
import numpy as np 
import random
import dataset_utils
import model_utils
from torch.utils.data import DataLoader,Dataset,random_split
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [10]:
def seed_everything(seed=2021):
    random.seed(seed)
    np.random.seed(seed)
    t.manual_seed(seed)
    t.cuda.manual_seed(seed)
    t.cuda.manual_seed_all(seed)m
    t.backends.cudnn.benchmark = False
    t.backends.cudnn.deterministic = True

In [11]:
def test(model,test_loader,criterion,device):
    
    def accuracy(y_true, y_pred):
        eq = t.eq(y_true, y_pred).int()
        return sum(eq)/len(eq)

    with t.no_grad():
        model.eval()
        for inputs,labels in test_loader:
            outputs = model(inputs.to(device))
            outputs1 = outputs.detach().cpu()
            acc += accuracy(labels,outputs1)
        print(f"accuracy: {(acc/len(test_loader))*100: 0.2f}%")

In [12]:
def save_model(name,model):
    t.save(model.state_dict(), f'models/{name}.pth')
    
def plot_graph(arr,epochs):
    plt.title("Loss vs Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.plot(arr,range(1,epochs+1))
    plt.show()

In [13]:
seed_everything()

audio_dir = "/raid/amana/lavish_multi_model/emotion_detection/data/raw_audio"
video_dir = "/raid/amana/lavish_multi_model/emotion_detection/data/video_frames"


dataset = dataset_utils.AVE_dataset(audio_dir,video_dir)
device = t.device('cuda' if t.cuda.is_available() else 'cpu')
# device = t.device('cpu')

In [14]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [15]:
num_classes = dataset.num_classes
model = model_utils.EmotionCNN(num_classes,in_channels=10).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = t.optim.Adam(model.parameters(), lr=0.001)

In [18]:
def train(model,train_loader,optimizer,criterion,num_epochs,device):
    loss_arr = []
    for epoch in tqdm(range(num_epochs)):
        # model.train()
        total_loss = 0
        for data_point in train_loader:
            # Forward pass
            outputs = model(data_point['audio_spec'].to(device))
            # print(outputs.item(),data_point['emotion_arr'].item())
            loss = criterion(outputs, data_point['emotion_arr'].to(device)).to(device)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        loss_arr.append(total_loss/len(train_loader))
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {total_loss/len(train_loader)}')
    return loss_arr

num_epochs = 10
lossarr = train(model,train_dataloader,optimizer,criterion,num_epochs,device)

 10%|█         | 1/10 [03:09<28:22, 189.17s/it]

Epoch 1/10, Training Loss: 1.7259777088176036


 20%|██        | 2/10 [06:26<25:51, 193.98s/it]

Epoch 2/10, Training Loss: 1.7224296600197724


 30%|███       | 3/10 [10:29<25:12, 216.14s/it]

Epoch 3/10, Training Loss: 1.7223556829278337


 40%|████      | 4/10 [14:34<22:47, 227.90s/it]

Epoch 4/10, Training Loss: 1.7223556826590416


 50%|█████     | 5/10 [18:32<19:17, 231.44s/it]

Epoch 5/10, Training Loss: 1.7223556826590416


 60%|██████    | 6/10 [22:11<15:08, 227.24s/it]

Epoch 6/10, Training Loss: 1.7223556823902495


 70%|███████   | 7/10 [25:53<11:16, 225.52s/it]

Epoch 7/10, Training Loss: 1.7223556818526653


 80%|████████  | 8/10 [29:25<07:22, 221.17s/it]

Epoch 8/10, Training Loss: 1.7223556810462892


 90%|█████████ | 9/10 [33:09<03:41, 221.98s/it]

Epoch 9/10, Training Loss: 1.7223556797023287


100%|██████████| 10/10 [36:38<00:00, 219.81s/it]

Epoch 10/10, Training Loss: 1.722355671100982



