# Analiza śmiertelnych interwencji policji w USA

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import urllib.request
import os
import warnings

warnings.filterwarnings('ignore')

## 1. Pobieranie i wczytywanie danych z dysku

In [None]:
url = 'https://uploads.kodilla.com/bootcamp/pro-data-visualization/files/fatal-police-shootings-data.csv'
filename = 'fatal-police-shootings-data.csv'

if not os.path.exists(filename):
    urllib.request.urlretrieve(url, filename)

df = pd.read_csv(filename)

## 2. Zestawienie rasy i chorób psychicznych

In [None]:
pivot_table = pd.crosstab(df['race'], df['signs_of_mental_illness'], margins=True)
pivot_table

## 3. Obliczanie odsetka przy użyciu Apply

In [None]:
pivot_clean = pd.crosstab(df['race'], df['signs_of_mental_illness'])
pivot_clean['Odsetek_z_choroba (%)'] = pivot_clean.apply(
    lambda row: (row[True] / (row[True] + row[False])) * 100 if (row[True] + row[False]) > 0 else 0,
    axis=1
)
pivot_clean = pivot_clean.sort_values('Odsetek_z_choroba (%)', ascending=False)
pivot_clean

## 4. Analiza interwencji według dni tygodnia

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['day_of_week'] = df['date'].dt.day_name()

days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
day_counts = df['day_of_week'].value_counts().reindex(days_order)

plt.figure(figsize=(10, 5))
plt.bar(days_order, day_counts.values, color='steelblue')
plt.title('Liczba interwencji w zależności od dnia tygodnia')
plt.show()

## 5. Pobieranie danych o populacji i łączenie baz

In [None]:
url_pop = 'https://simple.wikipedia.org/wiki/List_of_U.S._states_by_population'
url_abbr = 'https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations'

pop_df = pd.read_html(url_pop)[0]
pop_df = pop_df.iloc[:, [2, 3]]
pop_df.columns = ['State', 'Population']
pop_df['Population'] = pd.to_numeric(pop_df['Population'].astype(str).str.replace(',', ''), errors='coerce')

abbr_df = pd.read_html(url_abbr)[0]
abbr_df = abbr_df.iloc[8:, [0, 5]]
abbr_df.columns = ['State', 'Abbreviation']

states_info = pd.merge(pop_df, abbr_df, on='State')
incident_counts = df['state'].value_counts().reset_index()
incident_counts.columns = ['Abbreviation', 'Incidents']

final_df = pd.merge(incident_counts, states_info, on='Abbreviation')
final_df['per_1000'] = (final_df['Incidents'] / final_df['Population']) * 1000
final_df = final_df.sort_values('per_1000', ascending=False)

plt.figure(figsize=(12, 6))
plt.barh(final_df['Abbreviation'].head(15), final_df['per_1000'].head(15), color='darkred')
plt.gca().invert_yaxis()
plt.xlabel('Liczba incydentów na 1000 mieszkańców')
plt.title('Top 15 stanów - wskaźnik interwencji')
plt.show()

## Podsumowanie

In [None]:
print("PODSUMOWANIE ANALIZY")
print(f"Całkowita liczba incydentów: {len(df)}")
print(f"Rasa z największym odsetkiem choroby psychicznej: {pivot_clean.index[0]} ({pivot_clean.iloc[0]['Odsetek_z_choroba (%)']:.2f}%)")
print(f"Dzień tygodnia z największą liczbą incydentów: {day_counts.idxmax()}")
print(f"Stan z największym wskaźnikiem na 1000 osób: {final_df.iloc[0]['State']} ({final_df.iloc[0]['per_1000']:.4f})")