In [29]:
# Dependencies and set up
import pandas as pd
import numpy as np
import json

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

# City of Chicago -- Crimes - from 2001 to 2021

In [30]:
# Raw crime table
table = pd.read_csv('Crimes_-_2001_to_Present.csv') 
table.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Longitude,Location,Historical Wards 2003-2015,Zip Codes,Community Areas,Census Tracts,Wards,Boundaries - ZIP Codes,Police Districts,Police Beats
0,10224738,HY411648,09/05/2015 01:30:00 PM,043XX S WOOD ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,-87.67,"(41.815117282, -87.669999562)",29.0,14924.0,59.0,706.0,3.0,37.0,23.0,108.0
1,10224739,HY411615,09/04/2015 11:30:00 AM,008XX N CENTRAL AVE,870,THEFT,POCKET-PICKING,CTA BUS,False,False,...,-87.7654,"(41.895080471, -87.765400451)",4.0,4299.0,26.0,562.0,45.0,5.0,25.0,67.0
2,11646166,JC213529,09/01/2018 12:01:00 AM,082XX S INGLESIDE AVE,810,THEFT,OVER $500,RESIDENCE,False,True,...,,,,,,,,,,
3,10224740,HY411595,09/05/2015 12:45:00 PM,035XX W BARRY AVE,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),SIDEWALK,True,False,...,-87.71665,"(41.937405765, -87.716649687)",15.0,21538.0,22.0,216.0,12.0,39.0,7.0,168.0
4,10224741,HY411610,09/05/2015 01:00:00 PM,0000X N LARAMIE AVE,560,ASSAULT,SIMPLE,APARTMENT,False,True,...,-87.755121,"(41.881903443, -87.755121152)",11.0,22216.0,26.0,696.0,23.0,32.0,25.0,81.0


In [31]:
table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7374374 entries, 0 to 7374373
Data columns (total 30 columns):
 #   Column                      Dtype  
---  ------                      -----  
 0   ID                          int64  
 1   Case Number                 object 
 2   Date                        object 
 3   Block                       object 
 4   IUCR                        object 
 5   Primary Type                object 
 6   Description                 object 
 7   Location Description        object 
 8   Arrest                      bool   
 9   Domestic                    bool   
 10  Beat                        int64  
 11  District                    float64
 12  Ward                        float64
 13  Community Area              float64
 14  FBI Code                    object 
 15  X Coordinate                float64
 16  Y Coordinate                float64
 17  Year                        int64  
 18  Updated On                  object 
 19  Latitude             

In [4]:
# Groupby Primary type
df = table.groupby(['Primary Type']).count()['Description']
df

Primary Type
ARSON                                  12457
ASSAULT                               471163
BATTERY                              1351966
BURGLARY                              411125
CONCEALED CARRY LICENSE VIOLATION        768
CRIM SEXUAL ASSAULT                    27865
CRIMINAL DAMAGE                       840522
CRIMINAL SEXUAL ASSAULT                 3511
CRIMINAL TRESPASS                     207085
DECEPTIVE PRACTICE                    315525
DOMESTIC VIOLENCE                          1
GAMBLING                               14602
HOMICIDE                               11281
HUMAN TRAFFICKING                         73
INTERFERENCE WITH PUBLIC OFFICER       17682
INTIMIDATION                            4351
KIDNAPPING                              7032
LIQUOR LAW VIOLATION                   14531
MOTOR VEHICLE THEFT                   339823
NARCOTICS                             738661
NON - CRIMINAL                            38
NON-CRIMINAL                             1

In [3]:
# Dropping table
table = table.drop(columns=['Case Number', 'Location', 'Updated On', 'Block', 'Community Area', 'Beat', 'District', 'Ward', 'FBI Code', 'X Coordinate', 'Y Coordinate' ])
table

Unnamed: 0,ID,Date,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Year,Latitude,Longitude,Historical Wards 2003-2015,Zip Codes,Community Areas,Census Tracts,Wards,Boundaries - ZIP Codes,Police Districts,Police Beats
0,10224738,09/05/2015 01:30:00 PM,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,2015,41.815117,-87.670000,29.0,14924.0,59.0,706.0,3.0,37.0,23.0,108.0
1,10224739,09/04/2015 11:30:00 AM,0870,THEFT,POCKET-PICKING,CTA BUS,False,False,2015,41.895080,-87.765400,4.0,4299.0,26.0,562.0,45.0,5.0,25.0,67.0
2,11646166,09/01/2018 12:01:00 AM,0810,THEFT,OVER $500,RESIDENCE,False,True,2018,,,,,,,,,,
3,10224740,09/05/2015 12:45:00 PM,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),SIDEWALK,True,False,2015,41.937406,-87.716650,15.0,21538.0,22.0,216.0,12.0,39.0,7.0,168.0
4,10224741,09/05/2015 01:00:00 PM,0560,ASSAULT,SIMPLE,APARTMENT,False,True,2015,41.881903,-87.755121,11.0,22216.0,26.0,696.0,23.0,32.0,25.0,81.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,
7374369,12440325,07/30/2021 12:30:00 PM,0620,BURGLARY,UNLAWFUL ENTRY,False,False,2021,41.877460,-87.763258,52.0,22216.0,26.0,68.0,7.0,32.0,25.0,97.0,
7374370,12439124,07/30/2021 08:15:00 AM,0910,MOTOR VEHICLE THEFT,AUTOMOBILE,False,False,2021,42.017938,-87.681998,3.0,22528.0,10.0,52.0,5.0,12.0,11.0,33.0,
7374371,12443102,02/02/2021 12:00:00 PM,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,False,False,2021,41.872815,-87.702147,11.0,21184.0,28.0,36.0,23.0,28.0,16.0,140.0,
7374372,12438706,07/30/2021 07:25:00 PM,1152,DECEPTIVE PRACTICE,ILLEGAL USE CASH CARD,False,False,2021,41.775303,-87.762954,23.0,22268.0,62.0,266.0,15.0,7.0,13.0,272.0,


In [4]:
# Dropping NAN from latitude
table = table[table['Latitude'].notna()]
table

Unnamed: 0,ID,Date,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Year,Latitude,Longitude,Historical Wards 2003-2015,Zip Codes,Community Areas,Census Tracts,Wards,Boundaries - ZIP Codes,Police Districts,Police Beats
0,10224738,09/05/2015 01:30:00 PM,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,2015,41.815117,-87.670000,29.0,14924.0,59.0,706.0,3.0,37.0,23.0,108.0
1,10224739,09/04/2015 11:30:00 AM,0870,THEFT,POCKET-PICKING,CTA BUS,False,False,2015,41.895080,-87.765400,4.0,4299.0,26.0,562.0,45.0,5.0,25.0,67.0
3,10224740,09/05/2015 12:45:00 PM,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),SIDEWALK,True,False,2015,41.937406,-87.716650,15.0,21538.0,22.0,216.0,12.0,39.0,7.0,168.0
4,10224741,09/05/2015 01:00:00 PM,0560,ASSAULT,SIMPLE,APARTMENT,False,True,2015,41.881903,-87.755121,11.0,22216.0,26.0,696.0,23.0,32.0,25.0,81.0
5,10224742,09/05/2015 10:55:00 AM,0610,BURGLARY,FORCIBLE ENTRY,RESIDENCE,False,False,2015,41.744379,-87.658431,18.0,21554.0,70.0,575.0,13.0,59.0,20.0,237.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,
7374369,12440325,07/30/2021 12:30:00 PM,0620,BURGLARY,UNLAWFUL ENTRY,False,False,2021,41.877460,-87.763258,52.0,22216.0,26.0,68.0,7.0,32.0,25.0,97.0,
7374370,12439124,07/30/2021 08:15:00 AM,0910,MOTOR VEHICLE THEFT,AUTOMOBILE,False,False,2021,42.017938,-87.681998,3.0,22528.0,10.0,52.0,5.0,12.0,11.0,33.0,
7374371,12443102,02/02/2021 12:00:00 PM,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,False,False,2021,41.872815,-87.702147,11.0,21184.0,28.0,36.0,23.0,28.0,16.0,140.0,
7374372,12438706,07/30/2021 07:25:00 PM,1152,DECEPTIVE PRACTICE,ILLEGAL USE CASH CARD,False,False,2021,41.775303,-87.762954,23.0,22268.0,62.0,266.0,15.0,7.0,13.0,272.0,


In [5]:
table['Month'] = table['Date'].str[0:2].astype(int)
table.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table['Month'] = table['Date'].str[0:2].astype(int)


Unnamed: 0,ID,Date,IUCR,Primary Type,Description,Arrest,Domestic,Year,Latitude,Longitude,Historical Wards 2003-2015,Zip Codes,Community Areas,Census Tracts,Wards,Boundaries - ZIP Codes,Police Districts,Police Beats,Month
0,10224738,09/05/2015 01:30:00 PM,486,BATTERY,DOMESTIC BATTERY SIMPLE,False,True,2015,41.815117,-87.67,29.0,14924.0,59.0,706.0,3.0,37.0,23.0,108.0,9
1,10224739,09/04/2015 11:30:00 AM,870,THEFT,POCKET-PICKING,False,False,2015,41.89508,-87.7654,4.0,4299.0,26.0,562.0,45.0,5.0,25.0,67.0,9
3,10224740,09/05/2015 12:45:00 PM,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),True,False,2015,41.937406,-87.71665,15.0,21538.0,22.0,216.0,12.0,39.0,7.0,168.0,9
4,10224741,09/05/2015 01:00:00 PM,560,ASSAULT,SIMPLE,False,True,2015,41.881903,-87.755121,11.0,22216.0,26.0,696.0,23.0,32.0,25.0,81.0,9
5,10224742,09/05/2015 10:55:00 AM,610,BURGLARY,FORCIBLE ENTRY,False,False,2015,41.744379,-87.658431,18.0,21554.0,70.0,575.0,13.0,59.0,20.0,237.0,9


In [8]:
# Table types
print (table.dtypes)

ID                              int64
Date                           object
IUCR                           object
Primary Type                   object
Description                    object
Arrest                           bool
Domestic                         bool
Year                            int64
Latitude                      float64
Longitude                     float64
Historical Wards 2003-2015    float64
Zip Codes                     float64
Community Areas               float64
Census Tracts                 float64
Wards                         float64
Boundaries - ZIP Codes        float64
Police Districts              float64
Police Beats                  float64
Month                           int32
dtype: object


# Postgres connection - Chicago crime table

In [None]:
rds_connection_string = "postgres:postgres@localhost:5432/Project_03"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [None]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine,reflect=True)
Base

In [11]:
engine.table_names()

['Chicago_crime']

In [28]:

table.to_sql(name='Chicago_crime', con=engine, if_exists='append', index=False)

KeyboardInterrupt: 

In [None]:
from sqlalchemy import inspect

In [None]:
inspector = inspect(engine)
inspector.get_columns('Chicago_crime')