In [162]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from handlers.database import get_crime_relationship_statistics_from_mongo, get_hate_crime_statistics_from_mongo, get_property_statistics_from_mongo

In [163]:
relationship_statistics = get_crime_relationship_statistics_from_mongo()
relationship_statistics = relationship_statistics["statistics"]
relationships = relationship_statistics["assaults"]
relationships_normalized = relationship_statistics["assaults_normalized"]
relationships_without_assaults = relationship_statistics["without_assaults"]
relationships_without_assaults_normalized = relationship_statistics["without_assaults_normalized"]

relationship_df = pd.DataFrame(relationships)
relationship_df_normalized = pd.DataFrame(relationships_normalized)
relationship_df_without_assaults = pd.DataFrame(relationships_without_assaults)
relationship_df_without_assaults_normalized = pd.DataFrame(relationships_without_assaults_normalized)

In [164]:
def move_legend_outside_plot(ax):
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

def add_relationship_labels(ax):
    ax.set_title("Proportion of Criminal Acts for each Relationship Type")
    ax.set_xlabel("Proportion")
    ax.set_ylabel("Relationship Type")

In [165]:
def plot_swagger_df(df, title, normalized=False, orientation='h'):
    """
    Create a sleek horizontal stacked bar chart using seaborn and matplotlib.

    Parameters:
    - df: pd.DataFrame, the data to plot (columns as categories, rows as index)
    - title: str, the title of the plot
    - normalized: bool, whether to normalize rows to sum to 1
    """
    # Normalize rows if needed
    if normalized:
        df = df.div(df.sum(axis=1), axis=0)
    
    # Set a sleek seaborn style
    sns.set_theme(style="whitegrid", palette="pastel")
    
    # Create the figure and axis
    fig, ax = plt.subplots(figsize=(12, 7))
    
    # Get a vibrant color palette for the bars
    colors = sns.color_palette("husl", n_colors=df.shape[1])
    
    # Plot each column as a stacked horizontal bar
    bottom = None
    for i, column in enumerate(df.columns):
        sns.barplot(
            x=df[column],
            y=df.index,
            color=colors[i],
            label=column,
            orient=orientation,
            ax=ax,
            left=bottom
        )
        bottom = df.iloc[:, :i+1].sum(axis=1) if bottom is None else bottom + df[column]
    
    # Add title with a larger, bold font
    ax.set_title(title, fontsize=18, weight='bold', pad=20)
    
    # Move the legend outside the plot
    ax.legend(title="Categories", bbox_to_anchor=(1.05, 1), loc='upper left', frameon=True)
    
    # Add gridlines and make them more subtle
    ax.grid(color='gray', linestyle='--', linewidth=0.5, alpha=0.6)
    
    # Customize y-axis and x-axis labels
    ax.set_ylabel("", fontsize=14)
    ax.set_xlabel("Proportion" if normalized else "Value", fontsize=14)
    
    # Tighten layout and show the plot
    plt.tight_layout()
    plt.show()

In [166]:
def plot_swagger_dict(data, title):
    """
    Create a horizontal bar chart from a dictionary.

    Parameters:
    - data: dict, the data to plot (keys as categories, values as counts)
    - title: str, the title of the plot
    """
    # Sort data by values
    sorted_data = dict(sorted(data.items(), key=lambda item: item[1], reverse=True))
    
    # Prepare data for plotting
    labels = list(sorted_data.keys())
    heights = list(sorted_data.values())
    
    # Plot using seaborn and matplotlib
    sns.set_theme(style="whitegrid", palette="pastel")
    fig, ax = plt.subplots(figsize=(12, 7))
    sns.barplot(x=heights, y=labels, palette="husl", ax=ax, hue=labels)
    
    # Add title and labels
    ax.set_title(title, fontsize=18, weight="bold", pad=20)
    ax.set_xlabel("Count", fontsize=14)
    ax.set_ylabel("")  # Remove y-axis label
    
    # Add value annotations
    for i, value in enumerate(heights):
        ax.text(
            value + max(heights) * 0.01,  # Offset slightly for readability
            i,
            f"{int(value)}",  # Display as integer
            va="center", ha="left", fontsize=10, color="black", weight="bold"
        )
    
    plt.tight_layout()
    plt.show()


In [None]:
plot_swagger_df(relationship_df, "Proportion of Criminal Acts for each Relationship Type", normalized=False)
plot_swagger_df(relationship_df_normalized, "Proportion of Criminal Acts for each Relationship Type (Normalized)", normalized=True)
plot_swagger_df(relationship_df_without_assaults, "Proportion of Criminal Acts for each Relationship Type (Without Assaults)", normalized=False)
plot_swagger_df(relationship_df_without_assaults_normalized, "Proportion of Criminal Acts for each Relationship Type (Without Assaults, Normalized)", normalized=True)

In [None]:
hate_crime_statistics = get_hate_crime_statistics_from_mongo()
offender_counts = hate_crime_statistics["offense_counts"]
motive_counts = hate_crime_statistics["motive_counts"]

In [None]:
plot_swagger_dict(offender_counts, "Offender Race in Hate Crimes")
plot_swagger_dict(motive_counts, "Motive in Hate Crimes")

In [None]:
def plot_grouped_stats(stats_dict, title):
    # Extract means and medians from the dictionary
    mean_values = stats_dict['mean']
    median_values = stats_dict['median']
    
    # Set up the plot
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Positions for bars
    positions = np.arange(len(mean_values))
    
    # Create bars for mean and median
    ax.barh(positions - 0.2, list(mean_values.values()), height=0.4, label='Mean', color='skyblue')
    ax.barh(positions + 0.2, list(median_values.values()), height=0.4, label='Median', color='salmon')
    
    # Set labels
    ax.set_yticks(positions)
    ax.set_yticklabels(list(mean_values.keys()))
    ax.set_xlabel("Value (in USD)", fontsize=12)
    ax.set_title(title, fontsize=14, weight="bold")
    
    # Add legend
    ax.legend()

    # Show plot
    plt.tight_layout()
    plt.show()

In [None]:
property_statistics_dict = get_property_statistics_from_mongo()
property_statistics = property_statistics_dict["property_statistics"]
most_expensive_crimes = property_statistics_dict["most_expensive_crimes"]


In [None]:
plot_grouped_stats(property_statistics, "Mean vs. Median Damage per Property Damage")

In [None]:
from IPython.display import display, HTML

html_output = "<h3>Top 5 Most Expensive Crimes</h3><table><tr><th>#</th><th>Property Value</th><th>Description</th><th>Crime Against</th><th>Location</th></tr>"

for i, crime in enumerate(most_expensive_crimes):
    property_value = f"${round(crime['property_value'] / 1_000_000)}M"
    property_description = crime['property_description']
    property_loss_description = crime['property_loss_description']
    crime_against = crime['crime_against']
    location = crime['location']
    
    html_output += f"<tr><td>{i + 1}</td><td>{property_value}</td><td>{property_description} - {property_loss_description}</td><td>{crime_against}</td><td>{location}</td></tr>"

html_output += "</table>"
display(HTML(html_output))
