In [13]:
import math 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, mean_absolute_error, mean_squared_error
from mutil import *

In [2]:
df_delitos, df_camaras = load_data()
df_delitos = preprocess_data(df_delitos, df_camaras)

In [3]:
df_delitos.head()

Unnamed: 0,fecha_creacion,hora_creacion,incidente_c4,colonia,delegacion_inicio,sector_inicio,latitud,longitud,año_creacion,mes_creacion,dia_creacion,dia_semana,semana_creacion,id_camara
0,2022-04-13,20:00,Agresión-Persona,CENTRO (AREA 4),CUAUHTEMOC,ALAMEDA,19.435991,-99.147213,2022,4,13,2,15,1
1,2022-04-11,10:00,Denuncia-Persona Sospechosa,CENTRO (AREA 5),CUAUHTEMOC,ALAMEDA,19.435149,-99.147325,2022,4,11,0,15,1
2,2022-04-06,16:00,Agresión-Persona,CENTRO (AREA 5),CUAUHTEMOC,ALAMEDA,19.434847,-99.146744,2022,4,6,2,14,1
3,2022-04-06,11:00,Denuncia-Persona Sospechosa,CENTRO (AREA 5),CUAUHTEMOC,ALAMEDA,19.435283,-99.147152,2022,4,6,2,14,1
4,2022-04-12,08:00,Agresión-Persona,CENTRO (AREA 5),CUAUHTEMOC,ALAMEDA,19.435283,-99.147152,2022,4,12,1,15,1


In [4]:
#find out the most common crime
delitos = df_delitos['incidente_c4'].value_counts().sort_values(ascending=False)
# get the most common crime
delito = delitos.index[0]
most_common_crime = delito
print('Most common crime: ', most_common_crime)
# delitos.plot(kind='bar', title='Delitos')
# plt.show()

# Filter df_delitos by most common crime
df_delitos = df_delitos[df_delitos['incidente_c4'] == most_common_crime]
df_delitos = df_delitos.groupby('id_camara') 

Most common crime:  Agresión-Persona


In [41]:
if os.path.exists('delitos_all_count.pkl'):
    df_all_count = pd.read_pickle('delitos_all_count.pkl')
else:
    i=0
    df_all_count = pd.DataFrame()
    for id_camara, df in df_delitos:
        i+=1
        printstr = f'Processing {i}/{len(df_delitos.groups)} ID:{id_camara}...' 
        print(f'{printstr:40s}', end='\r')

        # Count the number of crimes per day but saving the date and the id_camara and set the fecha_creacion as index
        df_delitos_count = df.groupby(['id_camara', 'fecha_creacion']).size().reset_index(name='count')
        # Fill the missing dates with 0
        df_delitos_count = df_delitos_count.set_index('fecha_creacion').reindex(pd.date_range(start='2022-01-01', end='2023-01-31')).reset_index().rename(columns={'index': 'fecha_creacion'})
        df_delitos_count['count'] = df_delitos_count['count'].fillna(0)
        df_delitos_count['id_camara'] = df_delitos_count['id_camara'].fillna(id_camara)
        df_all_count = pd.concat([df_all_count, df_delitos_count])

    # save df_all_count to pkl
    df_all_count.to_pickle('delitos_all_count.pkl')

In [44]:
""" create next new dataframe
Name   Type   ID
Book1  ebook  1
Book2  paper  2
Book3  paper  3
Book1  ebook  1
Book2  paper  2
"""
df_test = pd.DataFrame({'Name': ['Book1', 'Book2', 'Book3', 'Book1', 'Book2'], 'Type': ['ebook', 'paper', 'paper', 'ebook', 'paper'], 'ID': [1, 2, 3, 1, 2]})

df_test.head()

Unnamed: 0,Name,Type,ID
0,Book1,ebook,1
1,Book2,paper,2
2,Book3,paper,3
3,Book1,ebook,1
4,Book2,paper,2


In [55]:
list(df_test.columns)

['Name', 'Type', 'ID']

In [58]:
df_test.groupby(list(df_test.columns)).size().reset_index(name='Count')

Unnamed: 0,Name,Type,ID,Count
0,Book1,ebook,1,2
1,Book2,paper,2,2
2,Book3,paper,3,1


In [42]:
df_all_count.head()


Unnamed: 0,fecha_creacion,id_camara,count
0,2022-01-01,1,0.0
1,2022-01-02,1,0.0
2,2022-01-03,1,1.0
3,2022-01-04,1,0.0
4,2022-01-05,1,0.0


In [None]:
# create a neural network with pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# create a dataset class
class DelitosDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.X = self.df[["id_camara", 'latitud','longitud',"año_creacion","mes_creacion", "dia_creacion", "dia_semana", "hora_creacion"]]
        # para Y tomar el numero de incidentes por camara
        self.y = self.df[['id_camara']]
        self.y = self.y.groupby('id_camara').size().reset_index(name='counts')
        self.y = self.y[['counts']]
        self.X = self.X.values
        self.y = self.y.values
        self.X = torch.from_numpy(self.X)
        self.y = torch.from_numpy(self.y)
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# create a neural network class
class DelitosNet(nn.Module):
    def __init__(self):
        super(DelitosNet, self).__init__()
        self.fc1 = nn.Linear(5, 10)
        self.fc2 = nn.Linear(10, 20)
        self.fc3 = nn.Linear(20, 1)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# create a neural network
net = DelitosNet()
print(net)

# create a loss function and an optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# create a dataset
dataset = DelitosDataset(df_delitos)

# create a dataloader
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2)

# train the network
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        # get the inputs
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs.float())
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

,

ImportError: DLL load failed while importing _C: No se puede encontrar el módulo especificado.