In [1]:
#from main import process_vid
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torchvision
import torch.nn.functional as F
import torch.nn as nn
import cv2
import numpy as np
from main import dense_optical_flow

In [2]:
#TODO
# decaying learning rate (might be better, might not)
# shuffle data - DONE
# instead of returning object, save data to .npy file in Process_vid DONE
# downscale images to a lower resolution / size DOne
# train function DONE
# test funtion DONE

In [3]:
# image processing stuff
# this tutorial + opencv docs are good help
#https://www.tutorialkart.com/opencv/python/opencv-python-resize-image/

# returns grayscale image
def grayscale(frame):
    return cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)

# downscales image
def downscale(img, scale_percent):
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    return cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)


# processes video
def process_vid(vid_location, labels_location, save_location):
    cap = cv2.VideoCapture(vid_location)
    labels = open(labels_location, "r")
    processed = []
    
    # get first frame for optical flow
    ret, first_frame = cap.read()
    first_frame = downscale(first_frame, 50)
    prev_frame = grayscale(first_frame)
    mask = np.zeros_like(first_frame)
    mask[..., 1] = 255 
    
    for i in tqdm(range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))-1)):
    #for i in tqdm(range(1000)):
        ret, frame = cap.read()
        frame = downscale(frame, 50)
        gray = grayscale(frame)
        rgb = grayscale(dense_optical_flow(prev_frame, gray, mask))
        processed.append([np.array(rgb), labels.readline()])
        prev_frame = gray
    
    cap.release()
    labels.close()
    np.random.shuffle(processed)
    np.save(save_location, processed)
    

# fetches video data 
def fetch_data(file_location):
    return np.load(file_location, allow_pickle=True)

#process_vid("data/train.mp4", "data/train.txt", "data/train_data.npy")
train_data = fetch_data("data/train_data.npy")
print(len(train_data))

20399


In [4]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("running on gpu")
else:
    device = torch.device("cpu")
    print("running on cpu")

class SpeedNet(nn.Module): # does the name make sense?(yes)
    def __init__(self):
        # init parent
        super().__init__()
        
        # conv layers (?)
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 32, 5)
        self.conv3 = nn.Conv2d(32, 64, 5)
        self.conv4 = nn.Conv2d(64, 64, 5)
        self.conv5 = nn.Conv2d(64,128, 5)
        
        x = torch.randn(240, 320).view(-1, 1, 240 ,320)
        self._to_linear = None
        self.convs(x)
        
        # Linear Layer # is less linear layer worse or same or better?
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 1)
        #self.forward(x)
    
    def convs(self, x):
        # I feel like this is the equivalent of a noob spamming buttons
        # in street fighter and blundering into a win(hopefully)
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv4(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv5(x)), (2,2))
        
        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        #print(self._to_linear)
        
        return x
    
    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear) # reshape to pass through linear biz
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x
    
net = SpeedNet().to(device)

running on gpu


In [5]:
X = torch.Tensor([i[0] for i in tqdm(train_data)]).view(-1, 240, 320)

100%|██████████| 20399/20399 [00:00<00:00, 469419.08it/s]


In [6]:
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
#TODO: add decaying learning rate
loss_function = nn.MSELoss()

#X = torch.Tensor([i[0] for i in tqdm(train_data)]).view(-1, 240, 320)

y = torch.Tensor([float(i[1]) for i in tqdm(train_data)])

100%|██████████| 20399/20399 [00:00<00:00, 807044.29it/s]


In [7]:
VAL_PERCENT = 0.05
val_size = int(len(X)*VAL_PERCENT)

In [8]:
train_X = X[:-val_size]
train_y = y[:-val_size]
test_X = X[-val_size:]
test_y = y[-val_size:]

train_y = train_y.view(-1, 1) # match the shape of the nets output

print(len(train_X))
print(len(test_X))

19380
1019


In [9]:
torch.cuda.empty_cache()

In [10]:
BATCH_SIZE = 100
EPOCHS = 3

def train(net):
    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
            batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 240, 320)
            batch_y = train_y[i:i+BATCH_SIZE]
            
            # mov to gpu
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            net.zero_grad()
            outputs = net(batch_X)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()
            
        print(f"Epoch: {epoch}. Loss: {loss}")

def average(lst):
    return sum(lst) / len(lst)

# calculate mse
def test(net):
    results = []
    with torch.no_grad():
        for i in tqdm(range(len(test_X))):
            net_out = net(test_X[i].view(-1, 1, 240, 320).to(device))[0][0]
            real = test_y[i]
            results.append((real - net_out)**2)
    
    return int(average(results))

def detailed_test(net):
    for i in range(len(test_X)):
        pred = int(net(test_X[i].view(-1, 1, 240, 320).to(device)))
        real = int(test_y[i])
        se = (real - pred)**2
        print(f"real {real}. pred {pred}. se {se}")
        print("---------------------")
        
train(net)
print("mse", test(net)) # I think this is right
#detailed_test(net)

100%|██████████| 194/194 [00:39<00:00,  4.92it/s]
  0%|          | 0/194 [00:00<?, ?it/s]

Epoch: 0. Loss: 12.497451782226562


100%|██████████| 194/194 [00:39<00:00,  4.93it/s]
  0%|          | 0/194 [00:00<?, ?it/s]

Epoch: 1. Loss: 9.73651123046875


100%|██████████| 194/194 [00:39<00:00,  4.87it/s]
  8%|▊         | 80/1019 [00:00<00:01, 798.99it/s]

Epoch: 2. Loss: 6.721657752990723


100%|██████████| 1019/1019 [00:01<00:00, 790.60it/s]

mse 7





In [11]:
# save this model
def save(path):
    torch.save(net.state_dict(), path)

save("models/big_data_model.pth")