In [1]:
import os
import cv2
import numpy as np

import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
downloads_dir = "./data/downloads"
dataset_dir = "./data/dataset"
train_dir = dataset_dir + "/Data/Train"
valid_dir = dataset_dir + "/Data/Validation"
test_dir = dataset_dir + "/Data/Test"

In [None]:
os.makedirs(downloads_dir, exist_ok=True)
os.makedirs(dataset_dir, exist_ok=True)

!wget "https://dl.dropboxusercontent.com/scl/fi/sm1mybj5a2v4ycrxbez7f/miniset.zip?rlkey=jwddeulszbudhbnjz3n70fj9f&st=ronsh841&dl=0" -O {downloads_dir}/minidataset.zip
#this is just a mini dataset to see if our code works or nah

!unzip {downloads_dir}/minidataset.zip -d {dataset_dir}

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ./data/dataset/Data/Test/B1H1_East/1102.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1116.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1088.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1158.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1206.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1165.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1172.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1189.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1200.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1161.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1175.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1203.png  
  inflating: ./data/dataset/Data/Test/B1H1_East/1029.png  
  inflating: ./data/dataset/Data/Test/B2H1_West/1031.png  
  inflating: ./data/dataset/Data/Test/B2H1_West/1024.png  
  inflating: ./data/dataset/Data/Test/B2H1_West/11

In [None]:
class ResNetClassifier(nn.Module):
    """
    Define ResNet model class
    """
    def __init__(self, num_classes):
        super(ResNetClassifier, self).__init__()
        self.resnet = models.resnet34(pretrained=True) #uses a pretrained resnet34 model from torchvision. testing 18
        #self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes) #change the classification fc layer to have the correct number of classes
        self.resnet.fc = nn.Sequential(
            nn.Dropout(p=0.5), #dropout layer to prevent overfitting. only of the fc layer tho, see if it does anything
            nn.Linear(self.resnet.fc.in_features, num_classes)
        )

    def forward(self, x):
        return self.resnet(x)

In [None]:
# Define a custom dataset for video frames
class VideoFrameDataset(Dataset):
    def __init__(self, frames, labels, transform=None):
        self.frames = frames  # list of frames as np arrays
        self.labels = labels  # corresponding labels for each frame
        self.transform = transform

    def __len__(self):
        return len(self.frames)

    def __getitem__(self, idx):
        frame = self.frames[idx]
        label = self.labels[idx]

        if isinstance(frame, np.ndarray):
            frame = Image.fromarray(frame)
        if self.transform:
            frame = self.transform(frame)

        #reverts frames to ndarray. ensures frames and labels are tensors so we can use .to(device)
        frame = torch.tensor(np.array(frame)).clone().detach().float()
        label = torch.tensor(label, dtype=torch.long)
        return frame, label

In [None]:
def load_data(directory):
    frames = []  # list of frames
    labels = []  # corresponding labels
    with os.scandir(directory) as folder_iterator:
        for folder in folder_iterator:
            if not folder.is_dir():
                continue
            label = folder.name
            with os.scandir(folder.path) as frame_iterator:
                for frame in frame_iterator:
                    if not frame.is_file() or not frame.name.lower().endswith('png'):
                        #if file is not png do not yield it
                        continue
                    image_path = os.path.join(folder.path, frame.name)
                    image = cv2.imread(image_path)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  #by default cv2 reads in bgr
                    frames.append(image)  #appends an array
                    labels.append(label)

    # numerically convert labels
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)
    return frames, labels

In [16]:
transform = transforms.Compose([transforms.ToTensor()])

train_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=10),   # randomly rotate the image by up to 10 degrees
    transforms.RandomResizedCrop(size=(540, 960),   # final output image size
                                scale=(0.8, 1.0),   # selects a random scale of the original image, and crops image randomly to meet this scale
                                ratio=(9/16, 9/16)),# resize the image to 540x960 while maintaining aspect ratio
    transforms.ToTensor()
])

train_frames, train_labels = load_data(train_dir)
valid_frames, valid_labels = load_data(valid_dir)
test_frames, test_labels = load_data(test_dir)

#check if they should all be the same
train_dataset = VideoFrameDataset(train_frames, train_labels, transform=train_transforms)
train_dataload = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0) #workers to 0 for now because of multiprocessing issues

valid_dataset = VideoFrameDataset(valid_frames, valid_labels, transform=transform)
valid_dataload = DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=0)

test_dataset = VideoFrameDataset(test_frames, test_labels, transform=transform)
test_dataload = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)

num_classes = len(np.unique(train_labels))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNetClassifier(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)



In [None]:
#figure out what inputs and labels looks like
for inputs,labels in train_dataload:
  print(inputs)
  print(labels)
  print(inputs.size())
  break

tensor([[[[0.4431, 0.4431, 0.4431,  ..., 0.6627, 0.6588, 0.6588],
          [0.4471, 0.4471, 0.4471,  ..., 0.6627, 0.6588, 0.6588],
          [0.4510, 0.4510, 0.4510,  ..., 0.6627, 0.6588, 0.6588],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.3647, 0.3647, 0.3608],
          [0.0000, 0.0000, 0.0000,  ..., 0.3686, 0.3647, 0.3647],
          [0.0000, 0.0000, 0.0000,  ..., 0.3608, 0.3569, 0.3569]],

         [[0.3529, 0.3529, 0.3529,  ..., 0.5843, 0.5804, 0.5804],
          [0.3569, 0.3569, 0.3569,  ..., 0.5843, 0.5804, 0.5804],
          [0.3608, 0.3608, 0.3608,  ..., 0.5843, 0.5804, 0.5804],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.3059, 0.3059, 0.3020],
          [0.0000, 0.0000, 0.0000,  ..., 0.3098, 0.3059, 0.3059],
          [0.0000, 0.0000, 0.0000,  ..., 0.3020, 0.2980, 0.2980]],

         [[0.2941, 0.2941, 0.2941,  ..., 0.5569, 0.5529, 0.5529],
          [0.2980, 0.2980, 0.2980,  ..., 0.5569, 0.5529, 0.5529],
          [0.3020, 0.3020, 0.3020,  ..., 0

In [None]:
for inputs,labels in valid_dataload:
  print(inputs)
  print(labels)
  print(inputs.size())
  break

tensor([[[[0.7725, 0.5216, 0.3686,  ..., 0.5686, 0.5686, 0.5725],
          [0.8039, 0.4941, 0.3608,  ..., 0.5686, 0.5686, 0.5686],
          [0.7725, 0.5529, 0.3490,  ..., 0.5686, 0.5647, 0.5647],
          ...,
          [0.7098, 0.7412, 0.5922,  ..., 0.5176, 0.5569, 0.6980],
          [0.7216, 0.7333, 0.5961,  ..., 0.6078, 0.5216, 0.6353],
          [0.7137, 0.7059, 0.6667,  ..., 0.7294, 0.5922, 0.5294]],

         [[0.8275, 0.5882, 0.3529,  ..., 0.5451, 0.5451, 0.5490],
          [0.8353, 0.5059, 0.3451,  ..., 0.5451, 0.5451, 0.5451],
          [0.8471, 0.5412, 0.3333,  ..., 0.5451, 0.5412, 0.5412],
          ...,
          [0.7804, 0.7451, 0.5765,  ..., 0.5020, 0.5333, 0.6745],
          [0.7843, 0.7490, 0.5804,  ..., 0.5922, 0.5059, 0.6157],
          [0.7490, 0.7333, 0.6510,  ..., 0.7137, 0.5765, 0.5098]],

         [[0.8078, 0.5608, 0.3216,  ..., 0.4980, 0.4980, 0.5020],
          [0.8118, 0.4784, 0.3137,  ..., 0.4980, 0.4980, 0.4980],
          [0.8275, 0.5176, 0.3059,  ..., 0

In [None]:
def evaluate_model(model, dataloader, criterion):
    """
    Evaluates a trained model given a validation/test dataloader

    Input:
    model: the model we would like to train
    dataloader: the data
    criterion: the loss function used to determine loss
    optimizer: the optimizer used in backpropagation
    num_epochs: the number of epochs we train for


    Output:
    the loss and accuracy
    """
    model.eval()

    running_loss = 0.0
    accurate_pred = 0
    total_pred = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            accurate_pred += (predicted == labels).sum().item()
            total_pred += labels.size(0)

    avg_loss = running_loss/total_pred
    accuracy = accurate_pred/total_pred

    return avg_loss, accuracy

In [17]:
def train_model(model, train_dataload, valid_dataload, criterion, optimizer, num_epochs=25):
    """
    Trains a given model over a given number of epochs. Default set to 25.

    Input:
    model: the model we would like to train
    dataloader: the data
    criterion: the loss function used to determine loss
    optimizer: the optimizer used in backpropagation
    num_epochs: the number of epochs we train for


    Output:
    a trained model
    """
    best_val_loss = float('inf')

    stored_accloss = []

    scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)


    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_dataload:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_dataload.dataset)

        valid_loss, valid_acc = evaluate_model(model, valid_dataload, criterion)

        stored_accloss.append((epoch_loss, valid_loss, valid_acc))

        if valid_loss < best_val_loss:
            best_val_loss = valid_loss
            torch.save(model.state_dict(), 'best_model.pth')

        print(f'Epoch {epoch}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy {valid_acc:.4f}')

        scheduler.step()


    #plot the acc and losses
    plt.plot([x[0] for x in stored_accloss], label='Train Loss')
    plt.plot([x[1] for x in stored_accloss], label='Valid Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training and Validation Loss')
    plt.show()

    plt.plot([x[2] for x in stored_accloss], label='Valid Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Validation Accuracy')
    plt.show()

    return model


In [None]:
trained_model = train_model(model, train_dataload, valid_dataload, criterion, optimizer, num_epochs=25)

Epoch 0/25, Train Loss: 0.5275, Valid Loss: 1.4496, Valid Accuracy 0.4694
Epoch 1/25, Train Loss: 0.0227, Valid Loss: 1.4145, Valid Accuracy 0.4526
Epoch 2/25, Train Loss: 0.0095, Valid Loss: 1.2812, Valid Accuracy 0.5318
Epoch 3/25, Train Loss: 0.0045, Valid Loss: 1.2966, Valid Accuracy 0.5162
Epoch 4/25, Train Loss: 0.0035, Valid Loss: 1.3397, Valid Accuracy 0.5006
Epoch 5/25, Train Loss: 0.0024, Valid Loss: 1.3073, Valid Accuracy 0.5030
Epoch 6/25, Train Loss: 0.0018, Valid Loss: 1.3168, Valid Accuracy 0.5318
Epoch 7/25, Train Loss: 0.0014, Valid Loss: 1.3289, Valid Accuracy 0.5018
Epoch 8/25, Train Loss: 0.0090, Valid Loss: 1.8857, Valid Accuracy 0.3541
Epoch 9/25, Train Loss: 0.0243, Valid Loss: 1.3724, Valid Accuracy 0.4814
Epoch 10/25, Train Loss: 0.0045, Valid Loss: 1.3448, Valid Accuracy 0.4430
Epoch 11/25, Train Loss: 0.0029, Valid Loss: 1.2848, Valid Accuracy 0.4778


In [14]:
#evaluate with test set
model.load_state_dict(torch.load('best_model.pth'))
test_loss, test_accuracy = evaluate_model(model, test_dataload, criterion)

print(f'Test Loss: {test_loss:.4f}, Test Accuracy {test_accuracy:.4f}') #its over fit

Test Loss: 1.8025, Test Accuracy 0.3825


In [None]:
torch.save(model, 'downloadable_best_model.pth')