In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix,accuracy_score,jaccard_score,f1_score,log_loss,classification_report
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import torch.nn.functional as F
dest = '/content/drive/MyDrive/Python/Chicago-Crime/'
BaseDat = pd.read_csv(dest + 'Chicago_Crime_Final_Data.csv')

In [None]:
#- i will keep only the 10 most frequent types of crime for prediction -#
KeepList = ['BATTERY','THEFT','CRIMINAL DAMAGE','ASSAULT','DECEPTIVE PRACTICE',
            'OTHER OFFENSE','MOTOR VEHICLE THEFT','WEAPONS VIOLATION','ROBBERY','BURGLARY']
BaseDat = BaseDat[BaseDat['primary_type'].isin(KeepList)]
BaseDat['Location_Desc'] = BaseDat.location_description.astype("category").cat.codes
BaseDat['Type'] = BaseDat.primary_type.astype("category").cat.codes
#- this datframe will hold the classes description -#
TypesCodes = pd.DataFrame(BaseDat.groupby('primary_type')['Type'].max())
BaseDat = BaseDat.drop(columns=['primary_type','location_description','Date'])
BaseDat.head(3)        

In [None]:
Weights = np.array(BaseDat['Type'].value_counts(normalize=True,sort=False))
Tens_Weights = torch.from_numpy(Weights).type(torch.FloatTensor)
# print(Weights)
# print(Tens_Weights)
weighted_sampler = WeightedRandomSampler(weights=Tens_Weights,num_samples=len(Tens_Weights),replacement=True)

In [None]:
print(Weights)

## Deep Neural Network (DNN) with PyTorch

### Standartization and spliting

In [None]:
y = BaseDat.loc[:, BaseDat.columns == 'Type']
X = BaseDat.drop(['Type'],axis=1)
#- convert to arrays -#
X = np.asarray(X)
y = np.asarray(y)
#- standardizing the features and spliting for train/test -#
X = preprocessing.MinMaxScaler().fit(X).transform(X)
print(X.shape)
#- split -#
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
class Data():
    # Constructor
    def __init__(self, x_tensor, y_tensor):
        self.y = y_tensor 
        self.x = x_tensor 
    # Getter    
    def __getitem__(self, index):
        return (self.x[index], self.y[index])
    # Get Length   
    def __len__ (self):
        return len(self.x)

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        return out

In [None]:
# Define The accuracy function
def accuracy(model, data_set):
    _, yhat = torch.max(model(data_set.x), 1)
    return (yhat == data_set.y).numpy().mean()

In [None]:
# Define the training function of the model
def train(data_set, model, criterion, train_loader, optimizer, epochs=200):
    LOSS = []
    for epoch in range(epochs):
        print('Epoch num:', epoch)
        for x, y in train_loader:
            optimizer.zero_grad()
            yhat = model(x)
            loss = criterion(yhat, torch.max(y, 1)[1])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            LOSS.append(loss.item())
        print('Loss:', loss.item())

In [None]:
#- prepare the tensors for input
x_train_tensor = torch.from_numpy(X_train).type(torch.FloatTensor)
y_train_tensor = torch.from_numpy(y_train).type(torch.LongTensor)
data_set = Data(x_train_tensor, y_train_tensor)
train_loader = DataLoader(dataset=data_set, batch_size=1000, sampler=weighted_sampler)

In [None]:
# set the parameters (1 hidden layer with 100 neurons)
input_size = 22
hidden_size = 100
num_classes = 10
model = NeuralNet(input_size, hidden_size, num_classes)
learning_rate = 0.0001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
# Train he model
LOSS = train(data_set, model, criterion, train_loader, optimizer, epochs=200)

## Accuracy on test Data

In [None]:
x_test_tensor = torch.from_numpy(X_test).type(torch.FloatTensor)
y_test_tensor = torch.from_numpy(y_test).type(torch.LongTensor)
predicted = model(x_test_tensor)
_, predicted_Class = torch.max(predicted, dim=1)

In [None]:
# print(y_test_tensor[:2])
# print(predicted[:2])
# print(predicted_Class[:2])

In [None]:
data_set = Data(x_test_tensor, y_test_tensor)
Accu = accuracy(model, data_set)
print('Accuracy of DL Model:', Accu)
print(classification_report(y_test, predicted_Class))
fig, ax = plt.subplots(figsize=(15, 8))
ax = sns.heatmap(confusion_matrix(y_test, predicted_Class), annot=True, fmt="d")
plt.xlabel('Predicted', fontsize = 15)
plt.ylabel('Actual', fontsize = 15)
plt.show()