In [25]:
import pandas as pd

ht_data = pd.read_csv('HT_2013_2023.csv')
ct_data = pd.read_csv('CT_2013_2023.csv')
leoka_data = pd.read_csv('LEOKA_ASSAULT_TIME_WEAPON_INJURY_1995_2022.csv')

ht_filtered = ht_data[ht_data['COUNTY_NAME'].notna()] if 'COUNTY_NAME' in ht_data.columns else pd.DataFrame()
ct_filtered = ct_data[ct_data['county_name'].notna()] if 'county_name' in ct_data.columns else pd.DataFrame()
leoka_filtered = leoka_data[leoka_data['COUNTY_NAME'].notna()] if 'COUNTY_NAME' in leoka_data.columns else pd.DataFrame()

print(f"HT Dataset: {ht_filtered.shape[0]} rows, {ht_filtered.shape[1]} columns")
print(f"CT Dataset: {ct_filtered.shape[0]} rows, {ct_filtered.shape[1]} columns")
print(f"LEOKA Dataset: {leoka_filtered.shape[0]} rows, {leoka_filtered.shape[1]} columns")

HT Dataset: 5231 rows, 19 columns
CT Dataset: 228305 rows, 31 columns
LEOKA Dataset: 320295 rows, 31 columns


In [26]:
import pandas as pd

ct_data = pd.read_csv('CT_2013_2023.csv')

if 'offense_name' in ct_data.columns:
    offense_types = ct_data['offense_name'].unique()
    print(offense_types)
else:
    print("Column 'offense_name' not found in CT dataset.")

['All Other Larceny' 'Theft From Motor Vehicle' 'Motor Vehicle Theft'
 'Burglary/Breaking & Entering' 'Theft From Building' 'Embezzlement'
 'Shoplifting' 'Theft of Motor Vehicle Parts or Accessories' 'Robbery'
 'Drug Equipment Violations' 'Drug/Narcotic Violations'
 'False Pretenses/Swindle/Confidence Game' 'Rape' 'Kidnapping/Abduction'
 'Weapon Law Violations' 'Destruction/Damage/Vandalism of Property'
 'Credit Card/Automated Teller Machine Fraud' 'Aggravated Assault'
 'Stolen Property Offenses' 'Wire Fraud' 'Simple Assault'
 'Counterfeiting/Forgery' 'Identity Theft' 'Impersonation'
 'Hacking/Computer Invasion' 'Intimidation'
 'Theft From Coin-Operated Machine or Device' 'Pocket-picking'
 'Purse-snatching' 'Extortion/Blackmail' 'Betting/Wagering' 'Arson'
 'Criminal Sexual Contact' 'Murder and Nonnegligent Manslaughter'
 'Pornography/Obscene Material' 'Welfare Fraud' 'Bribery' 'Animal Cruelty'
 'Sodomy' 'Human Trafficking, Involuntary Servitude'
 'Negligent Manslaughter' 'Statutory Rap

In [33]:
crime_categories = {
    "Violent Crimes": [
        "Murder and Nonnegligent Manslaughter", "Negligent Manslaughter", "Rape", 
        "Statutory Rape", "Sexual Assault With An Object", "Sodomy", 
        "Kidnapping/Abduction", "Aggravated Assault", "Simple Assault", 
        "Intimidation", "Human Trafficking, Involuntary Servitude"
    ],
    "Property Crimes": [
        "Burglary/Breaking & Entering", "Motor Vehicle Theft", "Stolen Property Offenses",
        "Arson", "Destruction/Damage/Vandalism of Property", "Theft From Motor Vehicle",
        "Theft of Motor Vehicle Parts or Accessories", "Theft From Building",
        "Theft From Coin-Operated Machine or Device", "All Other Larceny", "Shoplifting",
        "Robbery"
    ],
    "Financial & Fraud Crimes": [
        "Embezzlement", "False Pretenses/Swindle/Confidence Game",
        "Credit Card/Automated Teller Machine Fraud", "Wire Fraud", "Identity Theft",
        "Impersonation", "Hacking/Computer Invasion", "Counterfeiting/Forgery",
        "Extortion/Blackmail", "Bribery", "Welfare Fraud"
    ],
    "Drug & Weapon-Related Crimes": [
        "Drug/Narcotic Violations", "Drug Equipment Violations", "Weapon Law Violations"
    ],
    "Other Crimes": [
        "Pornography/Obscene Material", "Betting/Wagering", "Animal Cruelty",
        "Criminal Sexual Contact", "Purse-snatching", "Pocket-picking"
    ]
}

def categorize_crime(offense):
    for category, crimes in crime_categories.items():
        if offense in crimes:
            return category
    return "Uncategorized"

if "offense_name" in ct_filtered.columns:
    ct_filtered["Crime_Category"] = ct_filtered["offense_name"].apply(categorize_crime)
    print("Crime categories assigned successfully.")
else:
    print("Error: 'offense_name' column not found in ct_filtered.")


Crime categories assigned successfully.


In [39]:
import geopandas as gpd
import pandas as pd
import folium
from folium.plugins import HeatMap

us_counties = gpd.read_file("https://www2.census.gov/geo/tiger/GENZ2020/shp/cb_2020_us_county_5m.zip")

us_counties["coords"] = us_counties["geometry"].centroid
us_counties["lat"] = us_counties["coords"].apply(lambda x: x.y)
us_counties["lon"] = us_counties["coords"].apply(lambda x: x.x)
us_counties["County"] = us_counties["NAME"].str.title()
ct_filtered["County"] = ct_filtered["County"].str.title()

if "Crime_Category" in ct_filtered.columns and "County" in ct_filtered.columns:
    county_counts = ct_filtered.groupby("County").size().reset_index(name="Count")
    ct_filtered = ct_filtered.merge(county_counts, on="County", how="left")
    print("Count column created successfully.")
else:
    print("Error: 'Crime_Category' or 'County' column is missing.")

if "population" in ct_filtered.columns:
    ct_filtered["Normalized_Count"] = ct_filtered["Count"] / ct_filtered["population"]
else:
    ct_filtered["Normalized_Count"] = ct_filtered["Count"]

crime_categories = ct_filtered["Crime_Category"].unique()

for category in crime_categories:
    category_data = ct_filtered[ct_filtered["Crime_Category"] == category]

    county_counts = category_data.groupby("County")["Normalized_Count"].sum().reset_index()
    county_counts.columns = ["County", "Count"]
    merged_data = us_counties.merge(county_counts, on="County", how="left").fillna(0)
    county_latlon = merged_data[["lat", "lon", "Count"]].dropna()
    heat_data = list(zip(county_latlon["lat"], county_latlon["lon"], county_latlon["Count"]))
    m = folium.Map(location=[37.8, -96], zoom_start=5)
    HeatMap(heat_data).add_to(m)

    display(m)
    print(f"Showing normalized heatmap for: {category}\n")


  us_counties["coords"] = us_counties["geometry"].centroid


Count column created successfully.


Showing normalized heatmap for: Property Crimes



Showing normalized heatmap for: Financial & Fraud Crimes



Showing normalized heatmap for: Drug & Weapon-Related Crimes



Showing normalized heatmap for: Violent Crimes



Showing normalized heatmap for: Other Crimes

