In [None]:
import sqlite3
import pandas as pd
from scipy.stats import pearsonr

# Connect to database and load into DataFrame
conn = sqlite3.connect("crime_data.db")
df = pd.read_sql_query("SELECT Month, Type, LSOA_code FROM crime", conn)
conn.close()

# Choose an LSOA (replace with one from your data)
target_lsoa = "E01024001"

# Filter to that LSOA
df = df[df["LSOA_code"] == target_lsoa]

# Group by Month and Crime Type → count occurrences
pivot_df = df.pivot_table(index="Month", columns="Type", aggfunc="size", fill_value=0)

# Extract burglary counts
if "Burglary" not in pivot_df.columns:
    raise ValueError(f"No 'Burglary' records found in LSOA {target_lsoa}")
burglary = pivot_df["Burglary"]

# Correlate all other crime types with burglary
correlations = {}
for crime_type in pivot_df.columns:
    if crime_type != "Burglary":
        corr, _ = pearsonr(burglary, pivot_df[crime_type])
        correlations[crime_type] = corr

# Sort and print
sorted_corr = sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)

print(f"\nCorrelations with 'Burglary' in LSOA: {target_lsoa}")
for crime, corr in sorted_corr:
    print(f"{crime:35s}: {corr:.3f}")


In [2]:
import sqlite3
import pandas as pd
from scipy.stats import pearsonr

# Connect and load relevant columns
conn = sqlite3.connect("crime_data.db")
df = pd.read_sql_query("SELECT Month, Type FROM crime", conn)
conn.close()

# Group by Month and Type → count incidents
monthly_counts = df.pivot_table(index="Month", columns="Type", aggfunc="size", fill_value=0)

burglary = monthly_counts["Burglary"]

# Compute correlations with burglary
correlations = {}
for crime_type in monthly_counts.columns:
    if crime_type != "Burglary":
        corr, _ = pearsonr(burglary, monthly_counts[crime_type])
        correlations[crime_type] = corr

# Sort and display
sorted_corr = sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)

print("Correlation of each crime type with 'Burglary' (monthly, all LSOAs combined):\n")
for crime, corr in sorted_corr:
    print(f"{crime:35s}: {corr:.3f}")


Correlation of each crime type with 'Burglary' (monthly, all LSOAs combined):

Other theft                        : 0.468
Vehicle crime                      : 0.420
Public order                       : -0.247
Bicycle theft                      : -0.242
Other crime                        : -0.216
Shoplifting                        : -0.211
Robbery                            : 0.186
Anti-social behaviour              : -0.103
Violence and sexual offences       : -0.095
Theft from the person              : 0.079
Possession of weapons              : -0.041
Drugs                              : -0.016
Criminal damage and arson          : 0.006


In [None]:
import sqlite3
import pandas as pd
from scipy.stats import pearsonr

# Load data
conn = sqlite3.connect("crime_data.db")
df = pd.read_sql_query("SELECT Month, Type, LSOA_code FROM crime", conn)
conn.close()

# Get list of months and types
months = df["Month"].unique()
crime_types = df["Type"].unique()

correlation_records = []

for month in months:
    month_df = df[df["Month"] == month]
    
    # Pivot table: rows = LSOAs, cols = crime types
    pivot = month_df.pivot_table(index="LSOA_code", columns="Type", aggfunc="size", fill_value=0)
    
    # Skip if burglary is missing
    if "Burglary" not in pivot.columns:
        continue
    
    burglary = pivot["Burglary"]
    
    for crime in pivot.columns:
        if crime == "Burglary":
            corr, _ = pearsonr(burglary, pivot[crime])
            correlation_records.append({
                "Month": month,
                "Crime": crime,
                "CorrelationWithBurglary": corr
            })

# Create DataFrame
corr_df = pd.DataFrame(correlation_records)

# Get standard deviation and variance per crime type
agg_stats = corr_df.groupby("Crime")["CorrelationWithBurglary"].agg(["mean", "std", "var"]).sort_values(by="std", ascending=False)

print("Std and Variance of Correlation with Burglary (per Crime Type across LSOAs and Months):\n")
print(agg_stats)


Std and Variance of Correlation with Burglary (per Crime Type across LSOAs and Months):

                                  mean       std       var
Crime                                                     
Theft from the person         0.342326  0.070057  0.004908
Other theft                   0.379932  0.066376  0.004406
Robbery                       0.365823  0.052314  0.002737
Shoplifting                   0.302507  0.040787  0.001664
Drugs                         0.264709  0.034044  0.001159
Bicycle theft                 0.294387  0.032341  0.001046
Vehicle crime                 0.272894  0.032120  0.001032
Anti-social behaviour         0.367711  0.031494  0.000992
Violence and sexual offences  0.372273  0.031097  0.000967
Public order                  0.333823  0.031050  0.000964
Possession of weapons         0.137609  0.030500  0.000930
Criminal damage and arson     0.275797  0.026935  0.000725
Other crime                   0.041197  0.017246  0.000297


In [2]:
import sqlite3
import pandas as pd
from scipy.stats import pearsonr

# Step 1: Load data
conn = sqlite3.connect("crime_data.db")
df = pd.read_sql_query("SELECT Month, LSOA_code, Type FROM crime", conn)
conn.close()

# Step 2: Group and pivot
grouped = df.groupby(["Month", "LSOA_code", "Type"]).size().reset_index(name="Count")
pivot = grouped.pivot_table(index=["Month", "LSOA_code"], columns="Type", values="Count", fill_value=0)

# Step 3: Correlation computation
records = []

for (month, lsoa), row in pivot.iterrows():
    if "Burglary" not in row or row["Burglary"] == 0:
        continue

    for crime in row.index:
        if crime == "Burglary" or row[crime] == 0:
            continue

        # Since it's one row, correlation can't be computed here
        # But we can collect values across all LSOAs for each month later
        records.append({
            "Month": month,
            "LSOA": lsoa,
            "Crime": crime,
            "Burglary_Count": row["Burglary"],
            "OtherCrime_Count": row[crime]
        })

# Step 4: Turn to DataFrame
df_corr_input = pd.DataFrame(records)

# Step 5: For each crime type, correlate Burglary vs OtherCrime across all (Month, LSOA) points
correlation_stats = []
for crime in df_corr_input["Crime"].unique():
    subset = df_corr_input[df_corr_input["Crime"] == crime]
    if subset["Burglary_Count"].nunique() < 2 or subset["OtherCrime_Count"].nunique() < 2:
        continue
    corr, _ = pearsonr(subset["Burglary_Count"], subset["OtherCrime_Count"])
    correlation_stats.append({
        "Crime": crime,
        "CorrelationWithBurglary": corr
    })

correlation_df = pd.DataFrame(correlation_stats)

# Step 6: Optional — variance and std by Crime across months and LSOAs
# Group by Month and Crime
monthly_corrs = df_corr_input.groupby(["Month", "Crime"]).apply(
    lambda g: pearsonr(g["Burglary_Count"], g["OtherCrime_Count"])[0]
).reset_index(name="Correlation")

# Now compute std and var
stats_by_crime = monthly_corrs.groupby("Crime")["Correlation"].agg(["std", "var"]).sort_values(by="std", ascending=False)

print("STD and VAR of correlation with Burglary by Crime (across months & LSOAs):\n")
print(stats_by_crime)


STD and VAR of correlation with Burglary by Crime (across months & LSOAs):

                                   std       var
Crime                                           
Possession of weapons         0.140478  0.019734
Theft from the person         0.096333  0.009280
Other crime                   0.091152  0.008309
Other theft                   0.089460  0.008003
Robbery                       0.078383  0.006144
Shoplifting                   0.071888  0.005168
Vehicle crime                 0.064013  0.004098
Bicycle theft                 0.058735  0.003450
Drugs                         0.052122  0.002717
Criminal damage and arson     0.049903  0.002490
Public order                  0.048250  0.002328
Violence and sexual offences  0.040708  0.001657
Anti-social behaviour         0.038180  0.001458


  monthly_corrs = df_corr_input.groupby(["Month", "Crime"]).apply(


In [3]:
import sqlite3
import pandas as pd

# Connect and load relevant columns
conn = sqlite3.connect("crime_data.db")
df = pd.read_sql_query("SELECT Month, Type FROM crime", conn)
conn.close()

# Extract month of year (1–12)
df["MonthOfYear"] = pd.to_datetime(df["Month"]).dt.month

# Total crimes per month-of-year
total_crimes_by_month = df["MonthOfYear"].value_counts().sort_index()

# Burglary crimes per month-of-year
burglary_by_month = df[df["Type"] == "Burglary"]["MonthOfYear"].value_counts().sort_index()

# Compute percentage of burglary out of all crimes per month
burglary_percentage = (burglary_by_month / total_crimes_by_month * 100).round(2)

# Display
print("Burglary as % of all crimes per month-of-year:\n")
for month in range(1, 13):
    pct = burglary_percentage.get(month, 0.0)
    print(f"Month {month:2d}: {pct:5.2f}%")


Burglary as % of all crimes per month-of-year:

Month  1:  5.59%
Month  2:  4.99%
Month  3:  4.91%
Month  4:  4.81%
Month  5:  4.59%
Month  6:  4.36%
Month  7:  4.30%
Month  8:  4.66%
Month  9:  4.84%
Month 10:  4.57%
Month 11:  5.05%
Month 12:  5.43%
