In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import urllib.request
import os
import warnings

warnings.filterwarnings('ignore')

In [None]:
url = 'https://uploads.kodilla.com/bootcamp/pro-data-visualization/files/fatal-police-shootings-data.csv'
filename = 'fatal-police-shootings-data.csv'

if not os.path.exists(filename):
    urllib.request.urlretrieve(url, filename)

df = pd.read_csv(filename)
df.head()

In [None]:
pivot_table = pd.crosstab(df['race'], df['signs_of_mental_illness'], margins=True)
pivot_table

In [None]:
pivot_clean = pd.crosstab(df['race'], df['signs_of_mental_illness'])
pivot_clean['Odsetek_z_choroba (%)'] = pivot_clean.apply(
    lambda row: (row[True] / (row[True] + row[False])) * 100 if (row[True] + row[False]) > 0 else 0,
    axis=1
)
pivot_clean = pivot_clean.sort_values('Odsetek_z_choroba (%)', ascending=False)
pivot_clean

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['day_of_week'] = df['date'].dt.day_name()

days_order = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
day_counts = df['day_of_week'].value_counts().reindex(days_order)

plt.figure(figsize=(10,5))
plt.bar(days_order, day_counts.values, color='steelblue')
plt.title('Liczba interwencji wg dnia tygodnia')
plt.show()

In [None]:
url_pop = "https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States_by_population"
url_abbr = "https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations"

tables_pop = pd.read_html(url_pop, storage_options={"User-Agent": "Mozilla/5.0"})
tables_abbr = pd.read_html(url_abbr, storage_options={"User-Agent": "Mozilla/5.0"})

# --- Pobranie populacji ---
for t in tables_pop:
    if any("2020 pop." in str(c) for c in t.columns):
        pop_df = t.copy()
        break

pop_df = pop_df.rename(columns={pop_df.columns[0]: "State", "2020 pop.": "Population"})
pop_df["Population"] = pop_df["Population"].astype(str).str.replace(",", "")
pop_df["Population"] = pd.to_numeric(pop_df["Population"], errors="coerce")
pop_df = pop_df.dropna(subset=["Population"])

# --- Pobranie właściwej tabeli skrótów stanów (Tabela 1, MultiIndex) ---
t = tables_abbr[1]

t.columns = [f"{a}_{b}" for a, b in t.columns]

abbr_df = t[["Name_Name", "USPS (& ANSI)_Unnamed: 4_level_1"]].copy()
abbr_df.columns = ["State", "Abbreviation"]

# --- Łączenie danych ---
states_info = pd.merge(pop_df, abbr_df, on="State")

incident_counts = df["state"].value_counts().reset_index()
incident_counts.columns = ["Abbreviation", "Incidents"]

final_df = pd.merge(incident_counts, states_info, on="Abbreviation")
final_df["per_1000"] = (final_df["Incidents"] / final_df["Population"]) * 1000
final_df = final_df.sort_values("per_1000", ascending=False)

final_df.head()

In [None]:
plt.figure(figsize=(12,6))
plt.barh(final_df['Abbreviation'].head(15), final_df['per_1000'].head(15), color='darkred')
plt.gca().invert_yaxis()
plt.title('Top 15 stanów wg liczby incydentów na 1000 mieszkańców')
plt.show()

In [None]:
print("Całkowita liczba incydentów:", len(df))
print("Najwyższy odsetek chorób psychicznych:", pivot_clean['Odsetek_z_choroba (%)'].max())
print("Najczęstszy dzień tygodnia:", day_counts.idxmax())
print("Stan z najwyższym wskaźnikiem per 1000:", final_df.iloc[0]['State'])