# Import libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px

# Import file

In [2]:
SFPD = pd.read_csv('SFPD_stop_cleaned_data.csv')

  SFPD = pd.read_csv('SFPD_stop_cleaned_data.csv')


# Adding new columns

In [11]:
df = SFPD.drop(columns=SFPD.filter(like= "code", axis=1))
df['district'] = df['district'].replace({'OUT OF SF / UNK': 'Unknown', 'UNK/OUT OF SF': 'Unknown'})
df['district'].fillna('Unknown', inplace=True)
df = df.copy()
df.loc[:,'district'] = df['district'].astype(str).apply(lambda x : x.upper())
df['stop_datetime']=pd.to_datetime(df['stop_datetime'],errors='coerce')
df['stop_year']=df['stop_datetime'].dt.year
df['stop_month']=df['stop_datetime'].dt.month
df['stop_weekday']=df['stop_datetime'].dt.dayofweek
df['stop_hour']=df['stop_datetime'].dt.hour
df['stop_year_month']=df['stop_datetime'].dt.to_period("M")
df = df.copy()
def type_of_result(x):
    if x == "1" or x == "2" or x == "4":    # pour sélectionner que "No action", "Warning", ou "on site & release"
        x=0
    else: x=1
    return x
df["type_of_results"] = SFPD["results_of_stop_code"].astype(str).apply(type_of_result)
df = df.dropna(subset=['results_of_stop'])
def race(x):
    if x == "Black/African American":
        x = "Black"
    elif x == "White":
        x = "White"
    elif x == "Asian" or x == "Middle Eastern or South Asian":
        x = "Asian"
    elif x == "Hispanic/Latino(a)":
        x = "Hispanic"
    elif x == "Multi-racial":
        x = "Mixed"
    else: x = "Other"
    return x
df["ethnicity_group"] = df["perceived_race_ethnicity"].astype(str).apply(race)
def race_in_pop(x):
    if x == "Black":
        x = 0.054
    elif x == "White":
        x = 0.412
    elif x == "Asian":
        x = 0.339
    elif x == "Hispanic":
        x = 0.147
    elif x == "Mixed":
        x = 0.039
    else: x = 0.01
    return x
df["race_in_pop"] = df["ethnicity_group"].astype(str).apply(race_in_pop)

# EDA

## Impact of chronology (Year_Month) (Covid)

In [4]:
stop_counts=df.groupby(df['stop_year_month'])['doj_record_id'].count()
duration=df.groupby(df['stop_year_month'])['duration_of_stop'].mean()
result_prob=df.groupby(df['stop_datetime'].dt.to_period('M'))['type_of_results'].mean()

### Stop counts

In [35]:
fig=px.line(x=[str(i) for i in stop_counts.index],y=stop_counts.values)
fig.update_layout(height=600,width=1000,title=dict(text='Number of stop along the years',font_family='Arial Black'))
fig.update_xaxes(title_text='Month')
fig.update_yaxes(title_text='Stop counts')

In [6]:
# Visible decrease in SFPD stops around March 2020 ==> Covid impact

### Duration of stop

In [7]:
fig=px.line(x=[str(i) for i in duration.index],y=duration.values)
fig.update_layout(height=600,width=1000,title=dict(text='Average duration of stop along the years',font_family='Arial Black'))
fig.update_xaxes(title_text='Month')
fig.update_yaxes(title_text='Average duration of stop')

### Probability of action taken

In [8]:
fig=px.line(x=[str(i) for i in result_prob.index],y=result_prob.values)
fig.update_layout(height=600,width=1000,title=dict(text='Probability of actions taken from SFPD along the years',font_family='Arial Black'))
fig.update_xaxes(title_text='Month of Year')
fig.update_yaxes(title_text='Probability of action taken')

## Impact of hours of the day

### Stop count

In [9]:
h=df.groupby(df['stop_hour'])['doj_record_id'].count()

fig=px.bar(x=[str(i) for i in h.index],y=h.values, color=h.values)
fig.update_layout(height=600,width=1000,title=dict(text='Number of SFPD stops according to the time of day',font_family='Arial Black'))
fig.update_xaxes(title_text='Hour in the day')
fig.update_yaxes(title_text='Stops count')

### Average duration

In [10]:
h2=df.groupby(df['stop_hour'])['duration_of_stop'].mean()

fig=px.bar(x=[str(i) for i in h2.index],y=h2.values,color=h2.values)
fig.update_layout(height=600,width=1000,title=dict(text='Average duration of SFPD stop according to the time of day',font_family='Arial Black'))
fig.update_xaxes(title_text='Hour in the day')
fig.update_yaxes(title_text='Average duration of stop')

### with standard deviations

In [11]:
h_=df.groupby(df['stop_hour'])['duration_of_stop'].std()

fig=px.bar(x=[str(i) for i in h2.index],y=h2.values,color=h2.values, error_y=h_)
fig.update_layout(height=600,width=1000,title=dict(text='Duration of SFPD stop according to the time of day',font_family='Arial Black'))
fig.update_xaxes(title_text='Hour in the day')
fig.update_yaxes(title_text='Average duration of stop')

### Probability of action taken

In [12]:
h1=df.groupby(df['stop_hour'])['type_of_results'].mean()

fig=px.bar(x=[str(i) for i in h1.index],y=h1.values,color=h1.values)
fig.update_layout(height=600,width=1000,title=dict(text='Probability of action taken by SFPD according to the time of day',font_family='Arial Black'))
fig.update_xaxes(title_text='Hour in the day')
fig.update_yaxes(title_text='Probability of action taken')

## Impact of ethnicity

### Stop counts

In [13]:
j=df.groupby(df['ethnicity_group'])['doj_record_id'].count()/len(df['doj_record_id'])
jj=df.groupby(df['ethnicity_group'])['race_in_pop'].mean()

fig=px.line(x=[str(i) for i in jj.index],y=jj.values,color=px.Constant("Proportion of SF population"))
fig.add_bar(x=[str(i) for i in j.index],y=j.values,name="Proportion of arrestation")
fig.update_layout(height=600,width=1000,title=dict(text='Proportion of SFPD stops according to ethnicity',font_family='Arial Black'))
fig.update_xaxes(title_text='Ethnicity')
fig.update_yaxes(title_text='Proportion of total stops')

### Duration of stop

In [37]:
j2=df.groupby(df['perceived_race_ethnicity'])['duration_of_stop'].median()
#j_=df.groupby(df['perceived_race_ethnicity'])['duration_of_stop'].std()

fig=px.bar(x=[str(i) for i in j2.index],y=j2.values,color=j2.values) #, error_y=j_)
fig.update_layout(height=600,width=1000,title=dict(text='Average duration of SFPD stops according to ethnicity',font_family='Arial Black'))
fig.update_xaxes(title_text='Ethnicity')
fig.update_yaxes(title_text='Average stop duration')

### Probability of action taken

In [38]:
j1=df.groupby(df['perceived_race_ethnicity'])['type_of_results'].mean()

fig=px.bar(x=[str(i) for i in j1.index],y=j1.values, color=j1.values)
fig.update_layout(height=600,width=1000,title=dict(text='Probability of action taken by SFPD depending on ethnicity',font_family='Arial Black'))
fig.update_xaxes(title_text='Ethnicity')
fig.update_yaxes(title_text='Probability of action taken')

## Impact of district

In [16]:
k=df.groupby(df['district'])['doj_record_id'].count()/len(df['doj_record_id'])
k=k.sort_values(ascending=False)

fig=px.bar(x=[str(i) for i in k.index],y=k.values,color=k.values)
fig.update_layout(height=600,width=1000,title=dict(text='Proportion of SFPD stops according to district',font_family='Arial Black'))
fig.update_xaxes(title_text='District')
fig.update_yaxes(title_text='Proportion of total stops')

In [17]:
k2=df.groupby(df['district'])['duration_of_stop'].mean()
k2=k2.sort_values(ascending=False)

fig=px.bar(x=[str(i) for i in k2.index],y=k2.values, color=k2.values)
fig.update_layout(height=600,width=1000,title=dict(text='Average stop duration depending on district',font_family='Arial Black'))
fig.update_xaxes(title_text='District')
fig.update_yaxes(title_text='Average stop duration')

In [18]:
k1=df.groupby(df['district'])['type_of_results'].mean()
k1=k1.sort_values(ascending=False)

fig=px.bar(x=[str(i) for i in k1.index],y=k1.values, color=k1.values)
fig.update_layout(height=600,width=1000,title=dict(text='Probability of action taken by SFPD depending on district',font_family='Arial Black'))
fig.update_xaxes(title_text='District')
fig.update_yaxes(title_text='Probability of action taken')

## Impact of person number

In [40]:
l=df.groupby(df['person_number'])['type_of_results'].mean()

fig=px.bar(x=[str(i) for i in l.index],y=l.values, color=l.values)
fig.update_layout(height=600,width=1000,title=dict(text='Probability of action taken by SFPD depending on the number of person involved in the stop',font_family='Arial Black'))
fig.update_xaxes(title_text='Number of person involved in the stop')
fig.update_yaxes(title_text='Probability of action taken')