# Descubrimientos de patrones de accidentes de tránsito en la CDMX

### Creación del Dataset Final para generar el modelo de clasificación 

In [1]:
from math import radians, cos, sin, asin, sqrt, atan2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from pylab import rcParams
import seaborn as sns
import folium
import networkx as nx
import osmnx as ox
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString, MultiLineString
import shapefile as shp
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [2]:
sns.set(context='paper', style='ticks', palette='inferno')
sns.mpl.rc("figure", figsize=(25, 21))
mpl.rcParams['figure.dpi']= 200

In [3]:
ind_feat50  = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1//results_static//csv_ind_50m.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
ind_feat100 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1//results_static//csv_ind_100m.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
ind_feat200 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1//results_static//csv_ind_200m.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
ind_feat300 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1//results_static//csv_ind_300m.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
ind_feat500 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1//results_static//csv_ind_500m.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)



In [4]:
ind_feat50.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 263405 entries, 0 to 263404
Data columns (total 44 columns):
Unnamed: 0             263405 non-null int64
X                      263405 non-null float64
Y                      263405 non-null float64
attraction             263405 non-null int64
bank                   263405 non-null int64
bar                    263405 non-null int64
bar_300                263405 non-null int64
cetram                 263405 non-null int64
cinema                 263405 non-null int64
college                263405 non-null int64
crossing               263405 non-null int64
crossing_300           263405 non-null int64
cruce_peligroso        263405 non-null int64
hospital               263405 non-null int64
hospital_300           263405 non-null int64
hotel                  263405 non-null int64
hotel_300              263405 non-null int64
interseccion           263405 non-null int64
kindergarten           263405 non-null int64
mall                   263405 

In [5]:
train_dir_feat50  = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_50_train_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
train_dir_feat100 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_100_train_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
train_dir_feat200 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_200_train_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
train_dir_feat300 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_300_train_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
train_dir_feat500 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_500_train_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)


In [6]:
test_dir_feat50  = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_50_test_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
test_dir_feat100 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_100_test_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
test_dir_feat200 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_200_test_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
test_dir_feat300 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_300_test_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)
test_dir_feat500 = pd.read_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/results_dynamic/geo_500_test_data.csv',sep = ',', encoding = 'utf-8',header = 0, index_col =False)

Generamos nuestra variable objetivo

In [7]:
def create_dep_var(row):
    if (row['PER_ATRAPADO_DESBARRANCADO'] + 
        row['VEH_ATRAPADO'] + 
        row['VEH_DESBARRANCADO'] + 
        row['VOLCADURA'] + 
        row['ACC_FATAL'] + 
        row['ATROPELLADO_FATAL']+
        row['ATROPELLADO']+
        row['ACC_C_LESION']+
        row['FERROVIARIO']+
        row['MOTOCICLISTA']+
        row['OTROS']+
        row['ACC_PRENSADO']+
        row['ACC_S_LESION']) == 0:
        return 0
    else:
        return 1

In [8]:
train_dir_feat50['SINIESTRO']  = train_dir_feat50.apply(create_dep_var, axis=1)
test_dir_feat50['SINIESTRO']   =  test_dir_feat50.apply(create_dep_var, axis=1)

train_dir_feat100['SINIESTRO'] = train_dir_feat100.apply(create_dep_var, axis=1)
test_dir_feat100['SINIESTRO']  =  test_dir_feat100.apply(create_dep_var, axis=1)

train_dir_feat200['SINIESTRO'] = train_dir_feat200.apply(create_dep_var, axis=1)
test_dir_feat200['SINIESTRO']  =  test_dir_feat200.apply(create_dep_var, axis=1)

train_dir_feat300['SINIESTRO'] = train_dir_feat300.apply(create_dep_var, axis=1)
test_dir_feat300['SINIESTRO']  =  test_dir_feat300.apply(create_dep_var, axis=1)

train_dir_feat500['SINIESTRO'] = train_dir_feat500.apply(create_dep_var, axis=1)
test_dir_feat500['SINIESTRO']  =  test_dir_feat500.apply(create_dep_var, axis=1)

In [9]:
train_dir_feat50 = train_dir_feat50[[ 'ACC_C_LESION_100',
                                      'ACC_C_LESION_200',
                                      'ACC_FATAL_100',
                                      'ACC_FATAL_200',
                                      'ACC_PRENSADO_100',
                                      'ACC_PRENSADO_200',
                                      'ACC_S_LESION_100',
                                      'ACC_S_LESION_200',
                                      'ATROPELLADO_100',
                                      'ATROPELLADO_200',
                                      'ATROPELLADO_FATAL_100',
                                      'ATROPELLADO_FATAL_200',
                                      'FERROVIARIO_100',
                                      'FERROVIARIO_200',
                                      'MOTOCICLISTA_100',
                                      'MOTOCICLISTA_200',
                                      'OTROS_100',
                                      'OTROS_200',
                                      'PER_ATRAPADO_DESBARRANCADO_100',
                                      'PER_ATRAPADO_DESBARRANCADO_200',
                                      'VEH_ATRAPADO_100',
                                      'VEH_ATRAPADO_200',
                                      'VEH_DESBARRANCADO_100',
                                      'VEH_DESBARRANCADO_200',
                                      'VOLCADURA_100',
                                      'VOLCADURA_200',
                                      'SINIESTRO']]

train_dir_feat100 = train_dir_feat100[['ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]

train_dir_feat200 = train_dir_feat200[['ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]

train_dir_feat300 = train_dir_feat300[['ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]

train_dir_feat500 = train_dir_feat500[['ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]

In [10]:
test_dir_feat50 = test_dir_feat50[[    'ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]


test_dir_feat100 = test_dir_feat100[[  'ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]

test_dir_feat200 = test_dir_feat200[[  'ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]

test_dir_feat300 = test_dir_feat300[[  'ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]

test_dir_feat500 = test_dir_feat500[[  'ACC_C_LESION_100',
                                       'ACC_C_LESION_200',
                                       'ACC_FATAL_100',
                                       'ACC_FATAL_200',
                                       'ACC_PRENSADO_100',
                                       'ACC_PRENSADO_200',
                                       'ACC_S_LESION_100',
                                       'ACC_S_LESION_200',
                                       'ATROPELLADO_100',
                                       'ATROPELLADO_200',
                                       'ATROPELLADO_FATAL_100',
                                       'ATROPELLADO_FATAL_200',
                                       'FERROVIARIO_100',
                                       'FERROVIARIO_200',
                                       'MOTOCICLISTA_100',
                                       'MOTOCICLISTA_200',
                                       'OTROS_100',
                                       'OTROS_200',
                                       'PER_ATRAPADO_DESBARRANCADO_100',
                                       'PER_ATRAPADO_DESBARRANCADO_200',
                                       'VEH_ATRAPADO_100',
                                       'VEH_ATRAPADO_200',
                                       'VEH_DESBARRANCADO_100',
                                       'VEH_DESBARRANCADO_200',
                                       'VOLCADURA_100',
                                       'VOLCADURA_200',
                                       'SINIESTRO']]

In [11]:
train50 = pd.concat([ind_feat50,train_dir_feat50], axis=1)
train100 = pd.concat([ind_feat100,train_dir_feat100], axis=1)
train200 = pd.concat([ind_feat200,train_dir_feat200], axis=1)
train300 = pd.concat([ind_feat300,train_dir_feat300], axis=1)
train500 = pd.concat([ind_feat500,train_dir_feat500], axis=1)

test50 = pd.concat([ind_feat50,test_dir_feat50], axis=1)
test100 = pd.concat([ind_feat100,test_dir_feat100], axis=1)
test200 = pd.concat([ind_feat200,test_dir_feat200], axis=1)
test300 = pd.concat([ind_feat300,test_dir_feat300], axis=1)
test500 = pd.concat([ind_feat500,test_dir_feat500], axis=1)

In [12]:
train50.drop('Unnamed: 0', axis = 1, inplace = True)
train100.drop('Unnamed: 0', axis = 1, inplace = True)
train200.drop('Unnamed: 0', axis = 1, inplace = True)
train300.drop('Unnamed: 0', axis = 1, inplace = True)
train500.drop('Unnamed: 0', axis = 1, inplace = True)

In [13]:
test50.drop('Unnamed: 0', axis = 1, inplace = True)
test100.drop('Unnamed: 0', axis = 1, inplace = True)
test200.drop('Unnamed: 0', axis = 1, inplace = True)
test300.drop('Unnamed: 0', axis = 1, inplace = True)
test500.drop('Unnamed: 0', axis = 1, inplace = True)

In [15]:
train500.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5501 entries, 0 to 5500
Data columns (total 70 columns):
X                                 5501 non-null float64
Y                                 5501 non-null float64
attraction                        5501 non-null int64
bank                              5501 non-null int64
bar                               5501 non-null int64
bar_300                           5501 non-null int64
cetram                            5501 non-null int64
cinema                            5501 non-null int64
college                           5501 non-null int64
crossing                          5501 non-null int64
crossing_300                      5501 non-null int64
cruce_peligroso                   5501 non-null int64
hospital                          5501 non-null int64
hospital_300                      5501 non-null int64
hotel                             5501 non-null int64
hotel_300                         5501 non-null int64
interseccion             

In [14]:
test500.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5501 entries, 0 to 5500
Data columns (total 70 columns):
X                                 5501 non-null float64
Y                                 5501 non-null float64
attraction                        5501 non-null int64
bank                              5501 non-null int64
bar                               5501 non-null int64
bar_300                           5501 non-null int64
cetram                            5501 non-null int64
cinema                            5501 non-null int64
college                           5501 non-null int64
crossing                          5501 non-null int64
crossing_300                      5501 non-null int64
cruce_peligroso                   5501 non-null int64
hospital                          5501 non-null int64
hospital_300                      5501 non-null int64
hotel                             5501 non-null int64
hotel_300                         5501 non-null int64
interseccion             

In [25]:
train50.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/train50_fin.csv',encoding = 'utf-8')
train100.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/train100_fin.csv',encoding = 'utf-8')
train200.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/train200_fin.csv',encoding = 'utf-8')
train300.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/train300_fin.csv',encoding = 'utf-8')
train500.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/train500_fin.csv',encoding = 'utf-8')

test50.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/test50_fin.csv',encoding = 'utf-8')
test100.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/test100_fin.csv',encoding = 'utf-8')
test200.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/test200_fin.csv',encoding = 'utf-8')
test300.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/test300_fin.csv',encoding = 'utf-8')
test500.to_csv('/Users/daniel.rodriguez/Documents/ACC/ACC_PROOF/ACC1/final_datasets/test500_fin.csv',encoding = 'utf-8')