In [65]:
# Dependencies and set up
import pandas as pd
import numpy as np
import json

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

In [66]:
# Raw crime table
table = pd.read_csv('../../Crimes_-_2001_to_Present.csv')
table

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,10224738,HY411648,09/05/2015 01:30:00 PM,043XX S WOOD ST,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,12.0,61.0,08B,1165074.0,1875917.0,2015,02/10/2018 03:50:01 PM,41.815117,-87.670000,"(41.815117282, -87.669999562)"
1,10224739,HY411615,09/04/2015 11:30:00 AM,008XX N CENTRAL AVE,0870,THEFT,POCKET-PICKING,CTA BUS,False,False,...,29.0,25.0,06,1138875.0,1904869.0,2015,02/10/2018 03:50:01 PM,41.895080,-87.765400,"(41.895080471, -87.765400451)"
2,11646166,JC213529,09/01/2018 12:01:00 AM,082XX S INGLESIDE AVE,0810,THEFT,OVER $500,RESIDENCE,False,True,...,8.0,44.0,06,,,2018,04/06/2019 04:04:43 PM,,,
3,10224740,HY411595,09/05/2015 12:45:00 PM,035XX W BARRY AVE,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),SIDEWALK,True,False,...,35.0,21.0,18,1152037.0,1920384.0,2015,02/10/2018 03:50:01 PM,41.937406,-87.716650,"(41.937405765, -87.716649687)"
4,10224741,HY411610,09/05/2015 01:00:00 PM,0000X N LARAMIE AVE,0560,ASSAULT,SIMPLE,APARTMENT,False,True,...,28.0,25.0,08A,1141706.0,1900086.0,2015,02/10/2018 03:50:01 PM,41.881903,-87.755121,"(41.881903443, -87.755121152)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7373711,12438635,JE319437,07/29/2021 11:45:00 PM,003XX E RANDOLPH ST,0870,THEFT,POCKET-PICKING,PARK PROPERTY,False,False,...,42.0,32.0,06,1178911.0,1901260.0,2021,08/05/2021 05:07:13 PM,41.884356,-87.618470,"(41.884355847, -87.618469791)"
7373712,12437903,JE319203,07/29/2021 10:39:00 PM,046XX N MARINE DR,0486,BATTERY,DOMESTIC BATTERY SIMPLE,PARKING LOT / GARAGE (NON RESIDENTIAL),False,True,...,46.0,3.0,08B,1170399.0,1931093.0,2021,08/05/2021 05:07:13 PM,41.966409,-87.648852,"(41.966409353, -87.648852157)"
7373713,12443891,JE300684,07/13/2021 11:40:00 PM,058XX S THROOP ST,031A,ROBBERY,ARMED - HANDGUN,SIDEWALK,False,False,...,16.0,67.0,03,,,2021,08/05/2021 05:07:13 PM,,,
7373714,12437612,JE318886,07/29/2021 05:13:00 PM,106XX S CHAMPLAIN AVE,0560,ASSAULT,SIMPLE,APARTMENT,False,False,...,9.0,50.0,08A,1182494.0,1834426.0,2021,08/05/2021 05:07:13 PM,41.700875,-87.607385,"(41.700874918, -87.607385165)"


In [67]:
# Groupby Primary type
df = table.groupby(['Primary Type']).count()['Description']
df

Primary Type
ARSON                                  12455
ASSAULT                               471100
BATTERY                              1351865
BURGLARY                              411104
CONCEALED CARRY LICENSE VIOLATION        767
CRIM SEXUAL ASSAULT                    27865
CRIMINAL DAMAGE                       840459
CRIMINAL SEXUAL ASSAULT                 3502
CRIMINAL TRESPASS                     207073
DECEPTIVE PRACTICE                    315462
DOMESTIC VIOLENCE                          1
GAMBLING                               14602
HOMICIDE                               11281
HUMAN TRAFFICKING                         73
INTERFERENCE WITH PUBLIC OFFICER       17681
INTIMIDATION                            4351
KIDNAPPING                              7031
LIQUOR LAW VIOLATION                   14530
MOTOR VEHICLE THEFT                   339804
NARCOTICS                             738658
NON - CRIMINAL                            38
NON-CRIMINAL                             1

In [68]:
# Groupby Primary type, year and arrest
df = table.groupby(['Year', 'Primary Type', 'Arrest']).count()['ID']
df

Year  Primary Type       Arrest
2001  ARSON              False       829
                         True        181
      ASSAULT            False     24234
                         True       7150
      BATTERY            False     72986
                                   ...  
2021  STALKING           True         11
      THEFT              False     19305
                         True        842
      WEAPONS VIOLATION  False      1844
                         True       3420
Name: ID, Length: 1244, dtype: int64

In [69]:
# Groupby Primary type, year and arrest and count be case number
df = table.groupby(['Year', 'Primary Type', 'Arrest']).count()['Case Number']
df

Year  Primary Type       Arrest
2001  ARSON              False       829
                         True        181
      ASSAULT            False     24234
                         True       7150
      BATTERY            False     72986
                                   ...  
2021  STALKING           True         11
      THEFT              False     19305
                         True        842
      WEAPONS VIOLATION  False      1844
                         True       3420
Name: Case Number, Length: 1244, dtype: int64

In [70]:
# Groupby Primary type and ID
df = table.groupby(['Primary Type', 'Arrest']).count()['ID']
df

Primary Type       Arrest
ARSON              False       10885
                   True         1570
ASSAULT            False      366741
                   True       104359
BATTERY            False     1049158
                              ...   
STALKING           True          611
THEFT              False     1371119
                   True       180184
WEAPONS VIOLATION  False       21590
                   True        69827
Name: ID, Length: 71, dtype: int64

In [71]:
len(df)

71

In [72]:
# Defining table type
table['Date'].dtypes

dtype('O')

In [73]:
# Crime column type
print (table.dtypes)

ID                        int64
Case Number              object
Date                     object
Block                    object
IUCR                     object
Primary Type             object
Description              object
Location Description     object
Arrest                     bool
Domestic                   bool
Beat                      int64
District                float64
Ward                    float64
Community Area          float64
FBI Code                 object
X Coordinate            float64
Y Coordinate            float64
Year                      int64
Updated On               object
Latitude                float64
Longitude               float64
Location                 object
dtype: object


In [74]:
# Dropping table
table = table.drop(columns=['Case Number', 'Location', 'Updated On', 'Block', 'Community Area', 'Beat', 'District', 'Ward', 'FBI Code', 'X Coordinate', 'Y Coordinate' ])
table

Unnamed: 0,ID,Date,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Year,Latitude,Longitude
0,10224738,09/05/2015 01:30:00 PM,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,2015,41.815117,-87.670000
1,10224739,09/04/2015 11:30:00 AM,0870,THEFT,POCKET-PICKING,CTA BUS,False,False,2015,41.895080,-87.765400
2,11646166,09/01/2018 12:01:00 AM,0810,THEFT,OVER $500,RESIDENCE,False,True,2018,,
3,10224740,09/05/2015 12:45:00 PM,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),SIDEWALK,True,False,2015,41.937406,-87.716650
4,10224741,09/05/2015 01:00:00 PM,0560,ASSAULT,SIMPLE,APARTMENT,False,True,2015,41.881903,-87.755121
...,...,...,...,...,...,...,...,...,...,...,...
7373711,12438635,07/29/2021 11:45:00 PM,0870,THEFT,POCKET-PICKING,PARK PROPERTY,False,False,2021,41.884356,-87.618470
7373712,12437903,07/29/2021 10:39:00 PM,0486,BATTERY,DOMESTIC BATTERY SIMPLE,PARKING LOT / GARAGE (NON RESIDENTIAL),False,True,2021,41.966409,-87.648852
7373713,12443891,07/13/2021 11:40:00 PM,031A,ROBBERY,ARMED - HANDGUN,SIDEWALK,False,False,2021,,
7373714,12437612,07/29/2021 05:13:00 PM,0560,ASSAULT,SIMPLE,APARTMENT,False,False,2021,41.700875,-87.607385


In [75]:
# Dropping NAN from latitude
table = table[table['Latitude'].notna()]
table

Unnamed: 0,ID,Date,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Year,Latitude,Longitude
0,10224738,09/05/2015 01:30:00 PM,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,2015,41.815117,-87.670000
1,10224739,09/04/2015 11:30:00 AM,0870,THEFT,POCKET-PICKING,CTA BUS,False,False,2015,41.895080,-87.765400
3,10224740,09/05/2015 12:45:00 PM,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),SIDEWALK,True,False,2015,41.937406,-87.716650
4,10224741,09/05/2015 01:00:00 PM,0560,ASSAULT,SIMPLE,APARTMENT,False,True,2015,41.881903,-87.755121
5,10224742,09/05/2015 10:55:00 AM,0610,BURGLARY,FORCIBLE ENTRY,RESIDENCE,False,False,2015,41.744379,-87.658431
...,...,...,...,...,...,...,...,...,...,...,...
7373710,12442317,07/29/2021 08:00:00 PM,0870,THEFT,POCKET-PICKING,PARK PROPERTY,False,False,2021,41.884335,-87.619164
7373711,12438635,07/29/2021 11:45:00 PM,0870,THEFT,POCKET-PICKING,PARK PROPERTY,False,False,2021,41.884356,-87.618470
7373712,12437903,07/29/2021 10:39:00 PM,0486,BATTERY,DOMESTIC BATTERY SIMPLE,PARKING LOT / GARAGE (NON RESIDENTIAL),False,True,2021,41.966409,-87.648852
7373714,12437612,07/29/2021 05:13:00 PM,0560,ASSAULT,SIMPLE,APARTMENT,False,False,2021,41.700875,-87.607385


In [76]:
table.reset_index(drop=True)

Unnamed: 0,ID,Date,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Year,Latitude,Longitude
0,10224738,09/05/2015 01:30:00 PM,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,2015,41.815117,-87.670000
1,10224739,09/04/2015 11:30:00 AM,0870,THEFT,POCKET-PICKING,CTA BUS,False,False,2015,41.895080,-87.765400
2,10224740,09/05/2015 12:45:00 PM,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),SIDEWALK,True,False,2015,41.937406,-87.716650
3,10224741,09/05/2015 01:00:00 PM,0560,ASSAULT,SIMPLE,APARTMENT,False,True,2015,41.881903,-87.755121
4,10224742,09/05/2015 10:55:00 AM,0610,BURGLARY,FORCIBLE ENTRY,RESIDENCE,False,False,2015,41.744379,-87.658431
...,...,...,...,...,...,...,...,...,...,...,...
7300097,12442317,07/29/2021 08:00:00 PM,0870,THEFT,POCKET-PICKING,PARK PROPERTY,False,False,2021,41.884335,-87.619164
7300098,12438635,07/29/2021 11:45:00 PM,0870,THEFT,POCKET-PICKING,PARK PROPERTY,False,False,2021,41.884356,-87.618470
7300099,12437903,07/29/2021 10:39:00 PM,0486,BATTERY,DOMESTIC BATTERY SIMPLE,PARKING LOT / GARAGE (NON RESIDENTIAL),False,True,2021,41.966409,-87.648852
7300100,12437612,07/29/2021 05:13:00 PM,0560,ASSAULT,SIMPLE,APARTMENT,False,False,2021,41.700875,-87.607385


In [77]:
# Groupby by ID
table_2 = table.groupby(['Year', 'Primary Type', 'Arrest']).count()['ID']
table_2

Year  Primary Type       Arrest
2001  ARSON              False       824
                         True        180
      ASSAULT            False     24142
                         True       7118
      BATTERY            False     72665
                                   ...  
2021  STALKING           True         11
      THEFT              False     18797
                         True        842
      WEAPONS VIOLATION  False      1844
                         True       3419
Name: ID, Length: 1239, dtype: int64

In [78]:
print (table.dtypes)

ID                        int64
Date                     object
IUCR                     object
Primary Type             object
Description              object
Location Description     object
Arrest                     bool
Domestic                   bool
Year                      int64
Latitude                float64
Longitude               float64
dtype: object


In [79]:
rds_connection_string = "postgres:postgres@localhost:5432/Project_03"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [80]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine,reflect=True)
Base

OperationalError: (psycopg2.OperationalError) FATAL:  database "Project_03" does not exist

(Background on this error at: http://sqlalche.me/e/13/e3q8)

In [152]:
engine.table_names()

OperationalError: (psycopg2.OperationalError) FATAL:  database "Project_2" does not exist

(Background on this error at: http://sqlalche.me/e/13/e3q8)