In [9]:
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import datetime
import sqlite3
import sys
import traceback
import numpy as np
import Data.database_handler as dbHandler
from torchvision import transforms, utils
import datetime as dt
import random as rand
sys.path.append('..')
#%run Map_grid/map.ipynb import CalculateGrid

#Connecting to the SQLite database
data_amount = 1600000
db_path = r'Data\datasetNY.db'
grid_size = 5
chunk_amount = 255555
chunk_size = data_amount / chunk_amount
data = dbHandler.get_n_data_datetime_converted(db_path, data_amount)

class AccidentDataset(Dataset):
    def __init__(self, transform=None):
        self.coordinates = data
        self.coordinates = pd.DataFrame(self.coordinates, columns=['datetime', 'latitude', 'longitude'])
        
        #split into 500 chunks using numpy
        self.coordinates = np.array_split(self.coordinates, chunk_amount)

        #process each chunk and merge it back into one dataframe
        self.grids = []
        grid_lower_lat, grid_lower_long = 40.54, -74.15
        grid_upper_lat, grid_upper_long = 40.91, -73.70
        grid_lat_step = (grid_upper_lat - grid_lower_lat) / grid_size
        grid_long_step = (grid_upper_long - grid_lower_long) / grid_size
        for i in range(len(self.coordinates)-1):
            grid = np.zeros((grid_size, grid_size))
            for index, row in self.coordinates[i].iterrows():
                coordinates = row['latitude'], row['longitude']
                for j in range(grid_size):
                    for k in range(grid_size):
                        lat_lower = grid_lower_lat + j * grid_lat_step
                        lat_upper = grid_lower_lat + (j + 1) * grid_lat_step
                        long_lower = grid_lower_long + k * grid_long_step
                        long_upper = grid_lower_long + (k + 1) * grid_long_step
                        if lat_lower <= float(coordinates[0]) < lat_upper and long_lower <= float(coordinates[1]) < long_upper:
                            grid[j][k] += 1
                            break
            self.grids.append(grid/chunk_size)
        self.grids = np.array(self.grids)
        self.transform = transform      

    def __len__(self):
        return len(self.grids)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        grid = self.grids[idx]
        grid = torch.from_numpy(grid).float()

        max_index = np.argmax(grid)
        max_index = np.array(max_index)
        return grid.flatten(), torch.tensor(max_index.item()).long()

accident_dataset = AccidentDataset()

[1470213720.0, '40.762913', '-73.96981']


In [50]:

#Create new array with 60% of the data
train_size = int(0.6 * len(accident_dataset))
test_size = len(accident_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(accident_dataset, [train_size, test_size])

print(len(train_dataset))
print(len(test_dataset))
print(len(accident_dataset))

#Create dataloader
train_dataloader = DataLoader(train_dataset, batch_size=64)
test_dataloader = DataLoader(test_dataset, batch_size=64)

# define the class for multilinear regression
class Network(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.2)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(grid_size ** 2, 25),
            nn.ReLU(),
            nn.Linear(25, 25),
            nn.ReLU(),
            nn.Linear(25, grid_size ** 2),
        )

    def forward(self, x):
        #x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


# define the class for multilinear regression
# building the model object
#device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
device = torch.device('cpu')
print(f'Using {device} device')

model = Network().to(device)
if os.path.exists("model.pth"):
    model.load_state_dict(torch.load("model.pth"))
    print("Loaded model from model.pth")
else:
    print("No model found, creating new model")

# define the loss function
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

# define the training loop
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    print(size)
    model.train()
    print("Training model")
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        #print('X ', X)
        #print('y ', y)
        #print (X.shape)
        pred = model(X)
        #print('pred ', pred)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    print("Finished training model")

def test(dataloader, model, loss_fn):
    print("Testing model")
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct, also_correct = 0, 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            #print('y ', y)
            #print('predition', pred.argmax(1))

            #check if prediction is correct
            predictions = torch.topk(pred, 5, dim=1).indices
            #is_correct = (pred.argmax(1) == y or pred.argmax(1) == max_value)

            for i in range (len(predictions)):
                if y[i] in predictions[i]:
                    if y[i] == pred.argmax(1)[i]:
                        correct += 1
                    else:
                        also_correct += 1

            #correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            #print(correct)
    test_loss /= num_batches
    print(f"Main correct: {correct}  size: {size}  main correct/size: {correct/size}")
    print(f"Also correct: {also_correct}  size: {size}  also correct/size: {also_correct/size}")
    correct += also_correct
    correct /= size
    print(f"Test Error: Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}")

epochs = 0
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)

torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

model.eval()
edge_correct = 0
for i in range (25):
    randomnumber = rand.randint(0, len(test_dataset) - 1)
    #randomnumber = 86903
    #print(randomnumber)
    #x, y = test_dataset[randomnumber][0], test_dataset[randomnumber][1]
    edge = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    edge[i] = 1
    x, y = torch.tensor(edge).float(), i

    with torch.no_grad():
        pred = model(x.to(device))
        #print(pred)
        predicted, actual = pred.topk(grid_size), y
        max_value = pred.max(0)[0]
        index = []
        for i in range(len(predicted)):
            if predicted.values[i].item() >= 0.8 * max_value:
                index.append(predicted.indices[i].item())
        part1 = f'Predicted: {index}'.ljust(18, ' ')
        part2 = f'Actual: {actual}'.ljust(10, ' ')
        part3 = f'{actual in index}'.ljust(6, ' ')
        part4 = f'{max_value}'.ljust(10, ' ')
        print(part1, part2, part3, part4)
        #print(f'Predicted: "{index}", Actual: "{actual}" {actual in index} {max_value}')
        edge_correct += actual in index
print('------------------------------------------')
edgestr1 = f"Edge correct: {edge_correct}".ljust(18, ' ')
edgestr2 = f"Size: {25}".ljust(10, ' ')
edgestr3 = f"Edge correct/Size: {edge_correct/25}".ljust(20, ' ')
print(edgestr1, edgestr2, edgestr3)
#print(f"Edge correct: {edge_correct}  size: {25}  edge correct/size: {edge_correct/25}")


153332
102222
255554
Using cpu device
Loaded model from model.pth
Saved PyTorch Model State to model.pth
Predicted: [0]     Actual: 0  True   11.518342971801758
Predicted: [22, 1] Actual: 1  True   11.97995662689209
Predicted: [2]     Actual: 2  True   12.924844741821289
Predicted: [0, 2]  Actual: 3  False  7.235849857330322
Predicted: [0, 1]  Actual: 4  False  7.1129302978515625
Predicted: [0, 2]  Actual: 5  False  7.504836559295654
Predicted: [6]     Actual: 6  True   36.209922790527344
Predicted: [7]     Actual: 7  True   40.71848678588867
Predicted: [8, 2]  Actual: 8  True   9.444517135620117
Predicted: [1, 0]  Actual: 9  False  7.084555625915527
Predicted: [0, 1]  Actual: 10 False  4.951669216156006
Predicted: [11]    Actual: 11 True   37.688446044921875
Predicted: [12]    Actual: 12 True   28.36293601989746
Predicted: [13]    Actual: 13 True   29.45733070373535
Predicted: [14, 2] Actual: 14 True   7.240068435668945
Predicted: [0, 2]  Actual: 15 False  4.837311267852783
Predicted: