<a href="https://colab.research.google.com/github/Sankalp0208/Crime_RatePrediction/blob/main/Crime_Rate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
df = pd.read_csv("/content/01_District_wise_crimes_committed_IPC_2001_2012.csv")

In [6]:
df.head() # year is from 2001 to 2012

Unnamed: 0,index,STATE/UT,DISTRICT,YEAR,MURDER,ATTEMPT TO MURDER,CULPABLE HOMICIDE NOT AMOUNTING TO MURDER,RAPE,CUSTODIAL RAPE,OTHER RAPE,...,ARSON,HURT/GREVIOUS HURT,DOWRY DEATHS,ASSAULT ON WOMEN WITH INTENT TO OUTRAGE HER MODESTY,INSULT TO MODESTY OF WOMEN,CRUELTY BY HUSBAND OR HIS RELATIVES,IMPORTATION OF GIRLS FROM FOREIGN COUNTRIES,CAUSING DEATH BY NEGLIGENCE,OTHER IPC CRIMES,TOTAL IPC CRIMES
0,0,ANDHRA PRADESH,ADILABAD,2001,101,60,17,50,0,50,...,30,1131,16,149,34,175,0,181,1518,4154
1,1,ANDHRA PRADESH,ANANTAPUR,2001,151,125,1,23,0,23,...,69,1543,7,118,24,154,0,270,754,4125
2,2,ANDHRA PRADESH,CHITTOOR,2001,101,57,2,27,0,27,...,38,2088,14,112,83,186,0,404,1262,5818
3,3,ANDHRA PRADESH,CUDDAPAH,2001,80,53,1,20,0,20,...,23,795,17,126,38,57,0,233,1181,3140
4,4,ANDHRA PRADESH,EAST GODAVARI,2001,82,67,1,23,0,23,...,41,1244,12,109,58,247,0,431,2313,6507


In [7]:
df.shape

(9017, 34)

In [8]:
df.isnull().sum() # data does not contain any null value

Unnamed: 0,0
index,0
STATE/UT,0
DISTRICT,0
YEAR,0
MURDER,0
ATTEMPT TO MURDER,0
CULPABLE HOMICIDE NOT AMOUNTING TO MURDER,0
RAPE,0
CUSTODIAL RAPE,0
OTHER RAPE,0


In [9]:
df.columns

Index(['index', 'STATE/UT', 'DISTRICT', 'YEAR', 'MURDER', 'ATTEMPT TO MURDER',
       'CULPABLE HOMICIDE NOT AMOUNTING TO MURDER', 'RAPE', 'CUSTODIAL RAPE',
       'OTHER RAPE', 'KIDNAPPING & ABDUCTION',
       'KIDNAPPING AND ABDUCTION OF WOMEN AND GIRLS',
       'KIDNAPPING AND ABDUCTION OF OTHERS', 'DACOITY',
       'PREPARATION AND ASSEMBLY FOR DACOITY', 'ROBBERY', 'BURGLARY', 'THEFT',
       'AUTO THEFT', 'OTHER THEFT', 'RIOTS', 'CRIMINAL BREACH OF TRUST',
       'CHEATING', 'COUNTERFIETING', 'ARSON', 'HURT/GREVIOUS HURT',
       'DOWRY DEATHS', 'ASSAULT ON WOMEN WITH INTENT TO OUTRAGE HER MODESTY',
       'INSULT TO MODESTY OF WOMEN', 'CRUELTY BY HUSBAND OR HIS RELATIVES',
       'IMPORTATION OF GIRLS FROM FOREIGN COUNTRIES',
       'CAUSING DEATH BY NEGLIGENCE', 'OTHER IPC CRIMES', 'TOTAL IPC CRIMES'],
      dtype='object')

In [10]:
df['TOTAL IPC CRIMES'] = df.iloc[:, 5:].sum(axis=1)

grouped = df.groupby(['STATE/UT', 'DISTRICT'])['TOTAL IPC CRIMES'].sum().reset_index()

In [11]:
grouped['Risk_Level'] = pd.qcut(
    grouped['TOTAL IPC CRIMES'],
    q=3,
    labels=[0, 1, 2]
)


In [12]:
df = df.merge(
    grouped[['STATE/UT', 'DISTRICT', 'Risk_Level']],
    on=['STATE/UT', 'DISTRICT'],
    how='left'
)

In [13]:
y = df['Risk_Level']
y

Unnamed: 0,Risk_Level
0,2
1,2
2,2
3,2
4,2
...,...
9012,0
9013,0
9014,0
9015,1


In [14]:
X = df.drop(['Risk_Level', 'TOTAL IPC CRIMES','OTHER IPC CRIMES'], axis=1)
X = X.select_dtypes(include=['int64', 'float64'])

In [15]:
X

Unnamed: 0,index,YEAR,MURDER,ATTEMPT TO MURDER,CULPABLE HOMICIDE NOT AMOUNTING TO MURDER,RAPE,CUSTODIAL RAPE,OTHER RAPE,KIDNAPPING & ABDUCTION,KIDNAPPING AND ABDUCTION OF WOMEN AND GIRLS,...,CHEATING,COUNTERFIETING,ARSON,HURT/GREVIOUS HURT,DOWRY DEATHS,ASSAULT ON WOMEN WITH INTENT TO OUTRAGE HER MODESTY,INSULT TO MODESTY OF WOMEN,CRUELTY BY HUSBAND OR HIS RELATIVES,IMPORTATION OF GIRLS FROM FOREIGN COUNTRIES,CAUSING DEATH BY NEGLIGENCE
0,0,2001,101,60,17,50,0,50,46,30,...,104,1,30,1131,16,149,34,175,0,181
1,1,2001,151,125,1,23,0,23,53,30,...,65,8,69,1543,7,118,24,154,0,270
2,2,2001,101,57,2,27,0,27,59,34,...,209,9,38,2088,14,112,83,186,0,404
3,3,2001,80,53,1,20,0,20,25,20,...,37,2,23,795,17,126,38,57,0,233
4,4,2001,82,67,1,23,0,23,49,26,...,220,3,41,1244,12,109,58,247,0,431
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9012,9012,2012,0,0,0,0,0,0,0,0,...,0,0,3,3,0,1,0,1,0,0
9013,9013,2012,0,0,0,0,0,0,0,0,...,0,0,3,3,0,1,0,1,0,0
9014,9014,2012,5,6,2,6,0,6,2,2,...,15,1,1,186,0,2,0,1,0,44
9015,9015,2012,24,21,10,7,0,7,17,14,...,75,5,20,632,0,7,2,5,0,219


In [35]:
X.columns

Index(['index', 'YEAR', 'MURDER', 'ATTEMPT TO MURDER',
       'CULPABLE HOMICIDE NOT AMOUNTING TO MURDER', 'RAPE', 'CUSTODIAL RAPE',
       'OTHER RAPE', 'KIDNAPPING & ABDUCTION',
       'KIDNAPPING AND ABDUCTION OF WOMEN AND GIRLS',
       'KIDNAPPING AND ABDUCTION OF OTHERS', 'DACOITY',
       'PREPARATION AND ASSEMBLY FOR DACOITY', 'ROBBERY', 'BURGLARY', 'THEFT',
       'AUTO THEFT', 'OTHER THEFT', 'RIOTS', 'CRIMINAL BREACH OF TRUST',
       'CHEATING', 'COUNTERFIETING', 'ARSON', 'HURT/GREVIOUS HURT',
       'DOWRY DEATHS', 'ASSAULT ON WOMEN WITH INTENT TO OUTRAGE HER MODESTY',
       'INSULT TO MODESTY OF WOMEN', 'CRUELTY BY HUSBAND OR HIS RELATIVES',
       'IMPORTATION OF GIRLS FROM FOREIGN COUNTRIES',
       'CAUSING DEATH BY NEGLIGENCE'],
      dtype='object')

In [36]:
print(len(X.columns))
print(X.columns.tolist())

30
['index', 'YEAR', 'MURDER', 'ATTEMPT TO MURDER', 'CULPABLE HOMICIDE NOT AMOUNTING TO MURDER', 'RAPE', 'CUSTODIAL RAPE', 'OTHER RAPE', 'KIDNAPPING & ABDUCTION', 'KIDNAPPING AND ABDUCTION OF WOMEN AND GIRLS', 'KIDNAPPING AND ABDUCTION OF OTHERS', 'DACOITY', 'PREPARATION AND ASSEMBLY FOR DACOITY', 'ROBBERY', 'BURGLARY', 'THEFT', 'AUTO THEFT', 'OTHER THEFT', 'RIOTS', 'CRIMINAL BREACH OF TRUST', 'CHEATING', 'COUNTERFIETING', 'ARSON', 'HURT/GREVIOUS HURT', 'DOWRY DEATHS', 'ASSAULT ON WOMEN WITH INTENT TO OUTRAGE HER MODESTY', 'INSULT TO MODESTY OF WOMEN', 'CRUELTY BY HUSBAND OR HIS RELATIVES', 'IMPORTATION OF GIRLS FROM FOREIGN COUNTRIES', 'CAUSING DEATH BY NEGLIGENCE']


In [16]:
y = df['Risk_Level']
y

Unnamed: 0,Risk_Level
0,2
1,2
2,2
3,2
4,2
...,...
9012,0
9013,0
9014,0
9015,1


In [17]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)

In [18]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [19]:
base_learners = [
    ('dt', DecisionTreeClassifier(random_state=42)),
    ('svc', SVC(probability=True, random_state=42)),
    ('lr', LogisticRegression(max_iter=1000)),
    ('rdt', RandomForestClassifier(n_estimators=200,
    random_state=42))
]

In [20]:
meta_learner = LogisticRegression(max_iter=1000)

In [21]:
Stacking_clf = StackingClassifier(
  estimators=base_learners,
  final_estimator=meta_learner,
  cv=5
)

In [30]:
Stacking_clf.fit(X_train, y_train)

In [23]:
y_pred =Stacking_clf.predict(X_test)

In [24]:
accuracy = accuracy_score(y_test, y_pred)

# Over all accuracy

In [25]:
accuracy

0.8082039911308204

Individual accuracy

In [26]:
from sklearn.metrics import accuracy_score

# Dictionary to store accuracy results
accuracy_results = {}

# Loop through each base learner
for name, model in base_learners:
    # Train the model on training data
    model.fit(X_train, y_train)

    # Predict on test data
    y_pred = model.predict(X_test)

    # Calculate accuracy
    acc = accuracy_score(y_test, y_pred)

    # Store in dictionary
    accuracy_results[name] = acc

# Print results nicely
for name, acc in accuracy_results.items():
    print(f"{name} Accuracy: {acc:.3f}")

dt Accuracy: 0.712
svc Accuracy: 0.722
lr Accuracy: 0.749
rdt Accuracy: 0.813


# Saving file

In [27]:
import joblib


In [31]:
joblib.dump(Stacking_clf, "crime_risk_model.pkl")

['crime_risk_model.pkl']