# Load Library

In [None]:
from google.cloud import bigquery
import pandas as pd
import numpy as np
import plotly.express as px
import os

# Connect To BigQuery

In [None]:
# Setup Google application Credentials environment 
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'JSON APi File Here' ## Note : You will need your personel JSON APi 

# Construct a BigQuery client object
Client = bigquery.Client()

In [None]:
# Qeury Chicago Crime Data Before Covid-19 Pandemic
Before_Covid = """
    SELECT *
    FROM `bigquery-public-data.chicago_crime.crime`
    WHERE date BETWEEN TIMESTAMP("2018-02-20") AND TIMESTAMP("2020-01-24")
    ORDER BY date
"""

# Qeury Chicago Crime Data After Covid-19 Pandemic
After_Covid = """
    SELECT *
    FROM `bigquery-public-data.chicago_crime.crime`
    WHERE date BETWEEN TIMESTAMP("2020-01-24") AND TIMESTAMP("2021-03-16")
    ORDER BY date 
"""

In [None]:
# Make API Request 
query_job_after_covid = Client.query(After_Covid)
query_job_before_covid = Client.query(Before_Covid)

In [None]:
df_after = query_job_after_covid.result()
df_before = query_job_before_covid.result()

In [None]:
# Transfer Data to DataFrame 
data_after = df_after.to_dataframe() 
data_before = df_before.to_dataframe()

In [None]:
after_copy = data_after.copy()
before_copy = data_before.copy()

# Explore Data

In [None]:
## Read Meta-Data 
Meta_Data = pd.read_csv('Meta-Data.csv', encoding= 'unicode_escape')
Meta_Data

In [None]:
# Check data type & shape of each dataset 
before_copy.info()
after_copy.info()

### Check For Duplicates

In [None]:
# Check for duplicates in unique_key  & case_number columns 
# According to MetaData it shouldn't be duplicated value in this column 
before_copy.duplicated('case_number').value_counts()

In [None]:
after_copy.duplicated('case_number').value_counts()

In [None]:
before_copy.duplicated('unique_key').value_counts()

In [None]:
after_copy.duplicated('unique_key').value_counts()

### Check For NULL Values

In [None]:
# Check for null value in primary_type column
before_copy.primary_type.isnull().value_counts()

In [None]:
# Check for null value in primary_type column
after_copy.primary_type.isnull().value_counts()

In [None]:
# Check for null value in location_description column
before_copy.location_description.isnull().value_counts()

In [None]:
# Check for null value in location_description column
after_copy.location_description.isnull().value_counts()

# Clean & Transform Data

### Remove Duplicates

In [None]:
# This code to remove duplicates from case_number column which is unique to the incident
# according to MetaData so it shouldn't be duplicated value 
before_copy = before_copy.drop_duplicates('case_number')
after_copy = after_copy.drop_duplicates('case_number')

### Extract Time & Date

In [None]:
# This code to extract day name from date column 
before_copy['Day_Name'] = before_copy['date'].dt.day_name()
after_copy['Day_Name'] = after_copy['date'].dt.day_name()

In [None]:
# This code to extract day from date column
before_copy['Day'] = before_copy['date'].dt.day
after_copy['Day'] = after_copy['date'].dt.day

In [None]:
# This code to extract month from date column 
before_copy['Month'] = before_copy['date'].dt.month
after_copy['Month'] = after_copy['date'].dt.month

In [None]:
# This code to extract year from date column 
before_copy['Year'] = before_copy['date'].dt.year
after_copy['Year'] = after_copy['date'].dt.year

In [None]:
# This code to extract time from date column & change format to PM/AM 
before_copy['Time'] = before_copy['date'].dt.time.apply(lambda x: x.strftime('%H %p'))
after_copy['Time'] = after_copy['date'].dt.time.apply(lambda x: x.strftime('%H %p'))

### Change False & True To No & Yes

In [None]:
# Change False & True into No & Yes in arrest column, That change type of column from boolean into string (object)
before_copy['arrest'] = before_copy.arrest.replace({True: 'Yes', False: 'No'})
after_copy['arrest'] = after_copy.arrest.replace({True: 'Yes', False: 'No'})

In [None]:
# Change False & True into No & Yes in domestic column, That change type of column from boolean into string (object)
before_copy['domestic'] = before_copy.domestic.replace({True: 'Yes', False: 'No'})
after_copy['domestic'] = after_copy.domestic.replace({True: 'Yes', False: 'No'})

### Drop No Need Columns

In [None]:
# Drop columns no need for analysis 
before_copy = before_copy.drop(['case_number', 'description', 'district', 'beat', 'x_coordinate', 'y_coordinate', 'block', 'iucr', 'year', 'community_area', 'fbi_code', 'ward', 'updated_on'], axis=1)

In [None]:
# Drop columns no need for analysis 
after_copy = after_copy.drop(['case_number', 'description', 'district', 'beat', 'x_coordinate', 'y_coordinate', 'block', 'iucr', 'year', 'community_area', 'fbi_code', 'ward', 'updated_on'], axis=1)

# Descriptive analysis With Visualization

### Compare number of crime before & after pendemic by crime type 

In [None]:
# count number of crime by type after pendemic
after_primary_type_groupby = after_copy.groupby(['primary_type'])[['unique_key']].count().nlargest(10, 'unique_key')

# count number of crime by type before pendemic
before_primary_type_groupby = before_copy.groupby(['primary_type'])[['unique_key']].count().nlargest(10, 'unique_key')

# merge two results after & before into one table 
before_after_primary_type = pd.concat([before_primary_type_groupby, after_primary_type_groupby],axis=1,ignore_index=False)

# rename columns 
before_after_primary_type.columns=['Before Pendemic', 'After Pendemic']

# Calculate the percent of change on primary type 
before_after_primary_type['% Of Change'] = ((before_after_primary_type['After Pendemic']-before_after_primary_type['Before Pendemic'])/before_after_primary_type['Before Pendemic']*100)

# show the table
before_after_primary_type.reset_index()

In [None]:
Primary_Type_Viz = px.bar(before_after_primary_type,
                    barmode='group',
                    height=800,
                    width=800,
                    title="Primary Type Before And After Covid",                
)
Primary_Type_Viz.show()

### Compare number of crime before & after pendemic by location description

In [None]:
# Count number of crime by location description after pendemic (Return  top 10 largest only)
after_location_description_groupby = after_copy.groupby(['location_description'])[['unique_key']].count().nlargest(10, 'unique_key')

# Count number of crime by location description before pendemic (Return  top 10 largest only)
before_location_description_groupby = before_copy.groupby(['location_description'])[['unique_key']].count().nlargest(10, 'unique_key')

# merge two results after & before into one table 
before_after_location_description = pd.concat([before_location_description_groupby, after_location_description_groupby],axis=1,ignore_index=True)

# rename columns 
before_after_location_description.columns=['Before Pendemic', 'After Pendemic']

# Calculate the percent of change on location description 
before_after_location_description['% Of Change'] = ((before_after_location_description['After Pendemic']-before_after_location_description['Before Pendemic'])/before_after_location_description['Before Pendemic']*100)

# Show table 
before_after_location_description.reset_index()

In [None]:

Location_Description_Viz = px.bar(before_after_location_description,
                        height = 920,
                        width = 720,
                        barmode='group',
                        title="Location Description Before And After Covid",
)

Location_Description_Viz.show()

### Compare number of arrest & domestic before & after pendemic

##### Arrest

In [None]:
# Count number of arrest made after pendemic
after_arrest_groupby = after_copy.groupby(['arrest'])[['unique_key']].count()

# Count number of arrest made before pendemic
before_arrest_groupby = before_copy.groupby(['arrest'])[['unique_key']].count()

# merge two results after & before into one table 
before_after_arrest = pd.concat([before_arrest_groupby, after_arrest_groupby], axis=1, ignore_index=False)

# rename columns 
before_after_arrest.columns=['Arrest Before Pendemic', 'Arrest After Pendemic']

# Calculate the percent of change arrest 
before_after_arrest['% Of Change'] = ((before_after_arrest['Arrest After Pendemic']-before_after_arrest['Arrest Before Pendemic'])/before_after_arrest['Arrest Before Pendemic'] *100)

# Show table 
before_after_arrest.reset_index()

In [None]:
Arrest_Viz = px.bar(before_after_arrest, title="Arrest Before And After Covid",
                        height=920,
                        width=720,
                        barmode='group',
)
Arrest_Viz.show()

#### Domestic

In [None]:
# Count number of domestic after pendemic
after_domestic_groupby = after_copy.groupby(['domestic'])[['unique_key']].count()

# Count number of domestic before pendemic
before_domestic_groupby = before_copy.groupby(['domestic'])[['unique_key']].count()

# merge two results after & before into one table 
before_after_domestic = pd.concat([before_domestic_groupby, after_domestic_groupby], axis=1, ignore_index=False)

# rename columns 
before_after_domestic.columns=['Number Of Domestic Before Pendemic', 'Number Of Domestic After Pendemic']

# Calculate the percent of domestic change 
before_after_domestic['% Of Change'] = ((before_after_domestic['Number Of Domestic After Pendemic']-
                                                                        before_after_domestic['Number Of Domestic Before Pendemic'])
                                                                        /before_after_domestic['Number Of Domestic Before Pendemic'] *100)

# Show table 
before_after_domestic.reset_index()

In [None]:
Arrest_Viz = px.bar(before_after_domestic, title="Domestic Before And After Covid",
                        height=920,
                        width=720,
                        barmode='group',
)
Arrest_Viz.show()