# Crime Analysis Project

In [None]:

import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.stats.diagnostic import het_breuschpagan

# Step 1: Data Collection
df = pd.read_csv('/content/drive/MyDrive/CSV files/final_crime_data.csv')  # Replace with your file path

# Initial Data Exploration
print(df.head())
print(df.info())

# Step 2: Data Cleaning
df = df.drop_duplicates()
print("Missing Values:
", df.isna().sum())
df.dropna(inplace=True)
df['GDP'] = df['GDP'].replace({'56,495.85': 56495.85}).astype(float)

# Step 3: Data Transformation and Correlation Analysis
df_numeric = df.select_dtypes(include=['float64', 'int64']).drop(columns='Weapon rate', errors='ignore')
sns.heatmap(df_numeric.corr(), annot=True)
plt.title('Correlation Heatmap')
plt.show()

# Step 4: Skewness Checking and Transformation
skewness = df_numeric.skew()
print("Skewness of features:
", skewness)

for col in ['GDP', 'Unemployment rate', 'Urban land area', 'Population density']:
    df[col] = np.log1p(df[col])  # Apply log transformation

# Step 5: Exploratory Data Analysis
sns.pairplot(df_numeric)
plt.show()

# Step 6: Regression Analysis
X = sm.add_constant(df[['GDP', 'Unemployment rate', 'Population density', 'Primary completion rate', 'Sex ratio at birth', 'GPI']])
y = df['Crime Rate']
model = sm.OLS(y, X).fit()
print(model.summary())

# Residual Analysis
residuals = model.resid
sns.histplot(residuals, kde=True)
plt.title('Histogram of Residuals')
plt.show()

# Breusch-Pagan Test for Heteroscedasticity
bp_test = het_breuschpagan(residuals, X)
print("Breusch-Pagan Test Results:", dict(zip(['LM Statistic', 'p-value', 'f-value', 'f p-value'], bp_test)))
