In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from constants.file_paths import FilePaths

In [None]:
crime_sample_df = pd.read_parquet(FilePaths.crime_parquet_columns_of_interest)
crime_sample_df.head()

In [5]:
# exclude "Relationship Unknown" and "Victim Was Stranger" relationships
# and "None/Unknown" offense category
# as they're not informative for the plot.
crime_sample_df = crime_sample_df[crime_sample_df["relationship"] != "Relationship Unknown"]
crime_sample_df = crime_sample_df[crime_sample_df["relationship"] != "Victim Was Stranger"]
crime_sample_df = crime_sample_df[crime_sample_df["offense_category"] != "None/Unknown"]

# top_7_offense_category = crime_sample_df["offense_category"].value_counts().nlargest(7).index
# crime_sample_df = crime_sample_df[crime_sample_df["offense_category"].isin(top_7_offense_category)]

In [None]:
def move_legend_outside_plot(ax):
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

def add_relationship_labels(ax):
    ax.set_title("Proportion of Criminal Acts for each Relationship Type")
    ax.set_xlabel("Proportion")
    ax.set_ylabel("Relationship Type")

fig, ax = plt.subplots()
crime_sample_df.groupby("relationship")["offense_category"].value_counts(normalize=True).unstack().plot(kind="barh", stacked=True, ax=ax)
move_legend_outside_plot(ax)
plt.show()

# Exclude "Assault Offenses" from the plot
crime_sample_df = crime_sample_df[crime_sample_df["offense_category"] != "Assault Offenses"]

fig, ax = plt.subplots()
crime_sample_df.groupby("relationship")["offense_category"].value_counts(normalize=True).unstack().plot(kind="barh", stacked=True, ax=ax)
move_legend_outside_plot(ax)
plt.show()

In [None]:
# when offense category is "Larceny/Theft Offenses",
# do similar plot between relationship and "property_description"

# exclude "Other" property description as it's not informative for the plot.
crime_sample_df = crime_sample_df[crime_sample_df["property_description"] != "Other"]
top_10_property_description = crime_sample_df[crime_sample_df["offense_category"] == "Larceny/Theft Offenses"]["property_description"].value_counts().nlargest(10).index

fig, ax = plt.subplots()
crime_sample_df[crime_sample_df["property_description"].isin(top_10_property_description)].groupby("relationship")["property_description"].value_counts(normalize=True).unstack().plot(kind="barh", stacked=True, ax=ax)

move_legend_outside_plot(ax)
ax.set_title("Proportion of Larceny/Theft Offenses for each Relationship Type")
ax.set_xlabel("Proportion")
ax.set_ylabel("Relationship Type")
plt.show()

In [None]:
crime_sample_df = pd.read_parquet(FilePaths.crime_parquet_columns_of_interest)
crime_sample_df = crime_sample_df[crime_sample_df["relationship"] != "Relationship Unknown"]
crime_sample_df = crime_sample_df[crime_sample_df["relationship"] != "Victim Was Stranger"]
crime_sample_df = crime_sample_df[crime_sample_df["offense_category"] != "None/Unknown"]

out = crime_sample_df.groupby("relationship")["offense_category"].value_counts(normalize=True).unstack()
out = out.fillna(0)
out.to_dict()