# Austin Crime Data - Exploratory Data Analysis 

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import config
import plotly.express as px

In [2]:
#setup postgres database connection
database = f"postgres://{config.db_user}:{config.db_password}@localhost:5432/austin_crime"
engine = create_engine(database)

In [4]:
# create function to streamline querying into dataframe
def db_frame(query):
    return pd.read_sql_query(query, con=engine)

## Data Overview 

In [199]:
# create summary dataframe and add data 
incident_summary = db_frame('''SELECT COUNT(*) FROM crime_incidents''')
# rename column
incident_summary.rename({'count':'total_incidents'},axis=1,inplace=True)

# count of incidents by year
incident_summary['2020_incidents'] = db_frame('''SELECT COUNT(*)
                                                 FROM crime_incidents 
                                                 WHERE occurred_date BETWEEN '2020/01/01' AND '2020/12/31';''')
# count of 2020 incidents 
incident_summary['2019_incidents'] = db_frame('''SELECT COUNT(*)
                                                 FROM crime_incidents 
                                                 WHERE occurred_date BETWEEN '2019/01/01' AND '2019/12/31';''')
# count of 2020 incidents 
incident_summary['2018_incidents'] = db_frame('''SELECT COUNT(*)
                                                 FROM crime_incidents 
                                                 WHERE occurred_date BETWEEN '2018/01/01' AND '2018/12/31';''')
#rename column
incident_summary.rename({0:'count'}, axis=0, inplace=True)

In [None]:
incident_percent = []
#extract values fro summary 
for i in incident_summary.values[0][1:4]: 
    #calculate percentages and add to list 
    incident_percent.append(round((i/incident_summary.values[0][0])*100,2))
    
#create percantage row and add values 
incident_summary = incident_summary.append(
    pd.Series({'total_incidents':100.00,
               '2020_incidents':incident_percent[0],
               '2019_incidents':incident_percent[1],
               '2018_incidents':incident_percent[2]}, 
                  name='percentage'))


### Incidents by year 

In [201]:
incident_summary

Unnamed: 0,total_incidents,2020_incidents,2019_incidents,2018_incidents
count,303939.0,99347.0,104789.0,99062.0
percentage,100.0,32.69,34.48,32.59


### Unique Offense Types

In [210]:
unique_offenses = db_frame('''SELECT COUNT(*)
                             FROM offense_type;''') 
unique_offenses.rename({'count':'Number of Unique Offenses'})

Unnamed: 0,count
0,301


### Top Offense Type by year

In [254]:
 db_frame('''SELECT of.offense_type,
                                            COUNT(cr.incident_report_number) as incident_count                                              
                            FROM crime_incidents as cr 
                            LEFT JOIN offense_type as of
                            ON (cr.offense_code = of.offense_code)
                            WHERE EXTRACT(year FROM occurred_date)='2020'
                            GROUP BY of.offense_type
                            ORDER BY incident_count DESC 
                            LIMIT 5
                            ;''')

Unnamed: 0,offense_type,incident_count
0,BURGLARY OF VEHICLE,10958
1,FAMILY DISTURBANCE,10181
2,THEFT,7445
3,CRIMINAL MISCHIEF,5890
4,ASSAULT W/INJURY-FAM/DATE VIOL,5610


In [244]:
# 2020 
top_2020_category = db_frame('''SELECT of.offense_type,
                                            COUNT(cr.incident_report_number) as incident_count                                              
                            FROM crime_incidents as cr 
                            LEFT JOIN offense_type as of
                            ON (cr.offense_code = of.offense_code)
                            WHERE occurred_date BETWEEN '2020/01/01' AND '2020/12/31'
                            GROUP BY of.offense_type
                            ORDER BY incident_count DESC 
                            LIMIT 5
                            ;''')

In [245]:
top_2020_category

Unnamed: 0,offense_type,incident_count
0,BURGLARY OF VEHICLE,10939
1,FAMILY DISTURBANCE,10147
2,THEFT,7433
3,CRIMINAL MISCHIEF,5874
4,ASSAULT W/INJURY-FAM/DATE VIOL,5598


In [255]:
# create fucntion to return top categories based on year input 
def query_top_crime(year): 
     return  db_frame(f'''SELECT of.offense_type,
                                            COUNT(cr.incident_report_number) as incident_count                                              
                            FROM crime_incidents as cr 
                            LEFT JOIN offense_type as of
                            ON (cr.offense_code = of.offense_code)
                            WHERE EXTRACT(year FROM occurred_date)={year}
                            GROUP BY of.offense_type
                            ORDER BY incident_count DESC 
                            LIMIT 5
                            ;''')
    
#query top crime categories with funciton   
top_2018_category = query_top_crime('2018')
top_2019_category = query_top_crime('2019')
top_2020_category = query_top_crime('2020')

In [258]:
print('2018 Categories')
print('2019 Categories')
print('2018 Categories')

In [259]:
top_2020_category
top_2019_category
top_2018_category

Unnamed: 0,offense_type,incident_count
0,BURGLARY OF VEHICLE,10958
1,FAMILY DISTURBANCE,10181
2,THEFT,7445
3,CRIMINAL MISCHIEF,5890
4,ASSAULT W/INJURY-FAM/DATE VIOL,5610


Unnamed: 0,offense_type,incident_count
0,BURGLARY OF VEHICLE,11753
1,FAMILY DISTURBANCE,10356
2,THEFT,9603
3,ASSAULT W/INJURY-FAM/DATE VIOL,6128
4,CRIMINAL MISCHIEF,5530


Unnamed: 0,offense_type,incident_count
0,FAMILY DISTURBANCE,10505
1,BURGLARY OF VEHICLE,10500
2,THEFT,9109
3,ASSAULT W/INJURY-FAM/DATE VIOL,5701
4,CRIMINAL MISCHIEF,4632


In [None]:
[('2020/01/01', '2020/12/31')
('2019/01/01', '2019/12/31')
('2018/01/01', '2018/12/31')]

In [249]:
# create list of tuple pairs to iterate through 
year_list  = ['2020','2019','2018']
counter = 0
for i in year_date: 
    table_title = ['2020',]