In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

In [None]:
df = pd.read_csv(r'C:\Users\Lenovo\Downloads\Crime_Rate_Dataset.csv', encoding='unicode_escape')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
numeric_df = df.select_dtypes(include=[np.number])

plt.figure(figsize=(10,6))
sns.heatmap(numeric_df.corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap (Numeric Features)")
plt.show()

In [None]:
if "Year" in df.columns:
    plt.figure(figsize=(8,5))
    sns.countplot(x="Year", data=df)
    plt.title("Crime Trend Over Years")
    plt.show()

In [None]:
if "Crime_Type" in df.columns:
    df["Crime_Type"].value_counts().plot(kind="bar", figsize=(8,5))
    plt.title("Crime Type Distribution")
    plt.show()

In [None]:
target = "Cases_Reported"
X = df.drop(columns=[target])
y = df[target]

In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [None]:
for col in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score 

In [None]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

In [None]:
print("Model Performance:")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

In [None]:
def predict_cases(new_data: pd.DataFrame):
    """Predicts Cases_Reported for new crime data"""
    for col in new_data.select_dtypes(include=['object']).columns:
        if col in encoders:
            new_data[col] = encoders[col].transform(new_data[col])
    new_scaled = scaler.transform(new_data)
    prediction = model.predict(new_scaled)
    return prediction

In [None]:
import joblib

In [None]:
encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    encoders[col] = le
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df.drop(columns=["Cases_Reported"]))

In [None]:
joblib.dump(encoders, "encoders.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(model, "crime_model.pkl")

In [None]:
city = input("Enter City name: ")
crime_type = input("Enter Crime Type: ")
year = int(input("Enter Year: "))
arrests = int(input("Enter Arrests: "))
latitude = float(input("Enter Latitude: "))
longitude = float(input("Enter Longitude: "))
age = int(input("Enter Age: "))
gender = input("Enter Gender: ")

In [None]:
predicted_value = predict_cases(new_data)
print(" Predicted Cases_Reported:", predicted_value[0])