# Austin Crime Data - Exploratory Data Analysis 

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import config
import plotly.express as px

In [2]:
#setup postgres database connection
database = f"postgres://{config.db_user}:{config.db_password}@localhost:5432/austin_crime"
engine = create_engine(database)

In [4]:
# create function to streamline querying into dataframe
def db_frame(query):
    return pd.read_sql_query(query, con=engine)

## Data Overview 

In [199]:
# create summary dataframe and add data 
incident_summary = db_frame('''SELECT COUNT(*) FROM crime_incidents''')
# rename column
incident_summary.rename({'count':'total_incidents'},axis=1,inplace=True)

# count of incidents by year
incident_summary['2020_incidents'] = db_frame('''SELECT COUNT(*)
                                                 FROM crime_incidents 
                                                 WHERE occurred_date BETWEEN '2020/01/01' AND '2020/12/31';''')
# count of 2020 incidents 
incident_summary['2019_incidents'] = db_frame('''SELECT COUNT(*)
                                                 FROM crime_incidents 
                                                 WHERE occurred_date BETWEEN '2019/01/01' AND '2019/12/31';''')
# count of 2020 incidents 
incident_summary['2018_incidents'] = db_frame('''SELECT COUNT(*)
                                                 FROM crime_incidents 
                                                 WHERE occurred_date BETWEEN '2018/01/01' AND '2018/12/31';''')
#rename column
incident_summary.rename({0:'count'}, axis=0, inplace=True)

In [None]:
incident_percent = []
#extract values fro summary 
for i in incident_summary.values[0][1:4]: 
    #calculate percentages and add to list 
    incident_percent.append(round((i/incident_summary.values[0][0])*100,2))
    
#create percantage row and add values 
incident_summary = incident_summary.append(
    pd.Series({'total_incidents':100.00,
               '2020_incidents':incident_percent[0],
               '2019_incidents':incident_percent[1],
               '2018_incidents':incident_percent[2]}, 
                  name='percentage'))


### Incidents by year 

In [201]:
incident_summary

Unnamed: 0,total_incidents,2020_incidents,2019_incidents,2018_incidents
count,303939.0,99347.0,104789.0,99062.0
percentage,100.0,32.69,34.48,32.59


### Unique Offense Types

In [210]:
unique_offenses = db_frame('''SELECT COUNT(*)
                             FROM offense_type;''') 
unique_offenses.rename({'count':'Number of Unique Offenses'})

Unnamed: 0,count
0,301


### Top crime categories by year 

In [206]:
# 2020 
top_2020_cat = db_frame('''SELECT COUNT(cr.incident_report_number), 
                                   
                                        of.offense_type
                            FROM crime_incidents as cr 
                            LEFT JOIN offense_type as of
                            ON (cr.offense_code = of.offense_code)
                            GROUP BY of.offense_type
                            ORDER BY 
                            ;''')

In [207]:
top_2020_cat

Unnamed: 0,count,offense_type
0,69,ABUSE OF 911
1,2,ABUSE OF CORPSE
2,165,AGG ASLT ENHANC STRANGL/SUFFOC
3,1764,AGG ASLT STRANGLE/SUFFOCATE
4,3833,AGG ASSAULT FAM/DATE VIOLENCE
...,...,...
296,48,VOCO - ALCOHOL CONSUMPTION
297,2,VOCO AMPLIFIED MUSIC/VEHICLE
298,2,VOCO SOLICITATION PROHIBIT
299,2067,WARRANT ARREST NON TRAFFIC
