In [13]:
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import datetime
import sqlite3
import sys
import traceback
import numpy as np
import Data.database_handler as dbHandler
from torchvision import transforms, utils
import datetime as dt
sys.path.append('..')
#%run Map_grid/map.ipynb import CalculateGrid

#Connecting to the SQLite database
dataAmount = 200000
dbPath = r'Data\datasetNY.db'
gridSize = 5
chunkAmount = 500
chunkSize = dataAmount / chunkAmount

class AccidentDataset(Dataset):
    def __init__(self, db_path, transform=None):
        self.coordinates = dbHandler.get_n_data_datetime_converted(db_path, dataAmount)
        self.coordinates = pd.DataFrame(self.coordinates, columns=['datetime', 'latitude', 'longitude'])
        
        #split into 500 chunks using numpy
        self.coordinates = np.array_split(self.coordinates, chunkAmount)

        #process each chunk and merge it back into one dataframe
        self.grids = []
        grid_lower_lat, grid_lower_long = 40.54, -74.15
        grid_upper_lat, grid_upper_long = 40.91, -73.70
        grid_lat_step = (grid_upper_lat - grid_lower_lat) / gridSize
        grid_long_step = (grid_upper_long - grid_lower_long) / gridSize
        for i in range(len(self.coordinates)-1):
            grid = np.zeros((gridSize, gridSize))
            for index, row in self.coordinates[i].iterrows():
                coordinates = row['latitude'], row['longitude']
                for j in range(gridSize):
                    for k in range(gridSize):
                        lat_lower = grid_lower_lat + j * grid_lat_step
                        lat_upper = grid_lower_lat + (j + 1) * grid_lat_step
                        long_lower = grid_lower_long + k * grid_long_step
                        long_upper = grid_lower_long + (k + 1) * grid_long_step
                        if lat_lower <= float(coordinates[0]) < lat_upper and long_lower <= float(coordinates[1]) < long_upper:
                            grid[j][k] += 1
                            break
            self.grids.append(grid/chunkSize)
        self.grids = np.array(self.grids)
        self.transform = transform      

    def __len__(self):
        return len(self.grids)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        grid = self.grids[idx]
        grid = torch.from_numpy(grid).float()

        max_index = np.argmax(grid)
        max_index = np.array(max_index)
        max_index = torch.from_numpy(max_index)
        #get indicies of the highest value and nearest neighbours that are within 10% of the highest value
        #max_index = np.unravel_index(max_index, grid.shape)
        #max_index = np.array(max_index)
        #max_index = max_index.flatten()
        漢字を食べます = max_index[0] // gridSize, max_index[0] % gridSize
        max_value = grid[漢字を食べます[0]][漢字を食べます[1]].item()
        for i in range(gridSize):
            for j in range(gridSize):
                if grid[i][j] >= max_value * 0.9:
                    print('before: ' + max_index)
                    max_index = np.append(max_index, j * gridSize + i)
                    print('after:' + max_index)

        return grid, torch.from_numpy(max_index)

accident_dataset = AccidentDataset(dbPath)

#Create new array with 60% of the data
train_size = int(0.6 * len(accident_dataset))
test_size = len(accident_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(accident_dataset, [train_size, test_size])

print(len(train_dataset))
print(len(test_dataset))
print(len(accident_dataset))

#Create dataloader
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# define the class for multilinear regression
class Network(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(gridSize*gridSize, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, gridSize*gridSize),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


# define the class for multilinear regression
# building the model object
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f'Using {device} device')

model = Network().to(device)
if os.path.exists("model.pth"):
    model.load_state_dict(torch.load("model.pth"))
    print("Loaded model from model.pth")
else:
    print("No model found, creating new model")

# define the loss function
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

# define the training loop
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    print(size)
    model.train()
    print("Training model")
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # Compute prediction error
        #print(X)
        #print(y)
        pred = model(X)
        #print(pred)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    print("Finished training model")

def test(dataloader, model, loss_fn):
    print("Testing model")
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}")

epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)

torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

[1520607600.0, '40.728516', '-73.88428']
299
200
499
Using cpu device
No model found, creating new model
Epoch 1
-------------------------------
299
Training model


IndexError: invalid index of a 0-dim tensor. Use `tensor.item()` in Python or `tensor.item<T>()` in C++ to convert a 0-dim tensor to a number