<a href="https://colab.research.google.com/github/PrajwalaY26/UrbanProtect/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Load the data
from google.colab import files
uploaded = files.upload()

# Preprocessing
df['Time'] = pd.to_datetime(df['Time'], errors='coerce')
df['Hour'] = df['Time'].dt.hour
df['Date'] = df['Time'].dt.date
df['Year'] = df['Time'].dt.year
df['Month'] = df['Time'].dt.month
df['Day'] = df['Time'].dt.day
df = df.dropna()

# Encode 'Type of Crime'
label_encoder = LabelEncoder()
df['Type of Crime Encoded'] = label_encoder.fit_transform(df['Type of Crime'])

# Features and target
X = df[['State', 'Area', 'Year', 'Month', 'Day']]
y_incident = df['Type of Crime Encoded']

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['State', 'Area'])
    ],
    remainder='passthrough'
)

# Class weights
class_weights = compute_class_weight('balanced', classes=df['Type of Crime Encoded'].unique(), y=df['Type of Crime Encoded'])
class_weight_dict = dict(zip(df['Type of Crime Encoded'].unique(), class_weights))

# Initialize storage for predictions
meta_features_list = []
y_incident_test_combined_list = []

# Repeat the entire process 5 times
for repeat in range(5):
    print(f"Repeat {repeat + 1}/5")

    # Initialize variables for this repeat
    meta_features_repeat_list = []
    y_incident_test_repeat_list = []

    for iteration in range(200):
        # Split data
        X_train, X_test, y_incident_train, y_incident_test = train_test_split(
            X, y_incident, test_size=0.2, random_state=np.random.randint(10000)
        )

        # Define and train the base model
        classifier_pipeline = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('classifier', RandomForestClassifier(class_weight=class_weight_dict, n_estimators=100, max_depth=10))
        ])
        classifier_pipeline.fit(X_train, y_incident_train)

        # Predict and collect meta-features and targets
        meta_features = classifier_pipeline.predict_proba(X_test)
        meta_features_repeat_list.append(meta_features)
        y_incident_test_repeat_list.append(y_incident_test)

    # Stack all meta-features and targets for this repeat
    meta_features_matrix_repeat = np.vstack(meta_features_repeat_list)
    y_incident_test_combined_repeat = np.concatenate(y_incident_test_repeat_list)

    # Store meta-features and targets across repeats
    meta_features_list.append(meta_features_matrix_repeat)
    y_incident_test_combined_list.append(y_incident_test_combined_repeat)

# Combine all meta-features and targets from all repeats
meta_features_matrix_combined = np.vstack(meta_features_list)
y_incident_test_combined = np.concatenate(y_incident_test_combined_list)

# Train meta-model
meta_model = LogisticRegression(max_iter=200)
meta_model.fit(meta_features_matrix_combined, y_incident_test_combined)

# Evaluate meta-model
meta_predictions = meta_model.predict(meta_features_matrix_combined)
accuracy = accuracy_score(y_incident_test_combined, meta_predictions)
print(f'\nAverage Accuracy after 5 repeats: {accuracy}')
print(classification_report(y_incident_test_combined, meta_predictions, target_names=label_encoder.classes_))

Saving RECORDS.xlsm to RECORDS.xlsm
Repeat 1/5
Repeat 2/5
Repeat 3/5
Repeat 4/5
Repeat 5/5

Average Accuracy after 5 repeats: 0.4073333333333333
              precision    recall  f1-score   support

     Assault       0.40      0.42      0.41      5388
    Burglary       0.41      0.35      0.38      5510
       Fraud       0.42      0.43      0.42      5454
  Kidnapping       0.43      0.50      0.46      5711
      Murder       0.41      0.47      0.44      5415
     Robbery       0.37      0.31      0.34      5866
       Theft       0.40      0.38      0.39      5656

    accuracy                           0.41     39000
   macro avg       0.41      0.41      0.41     39000
weighted avg       0.41      0.41      0.41     39000



In [None]:
def get_user_input():
    # Collect user input
    state = input("Please enter the state where the crime occurred: ")
    area = input("Please enter the area within the state: ")
    year = int(input("Please enter the year when the crime occurred: "))
    month = int(input("Please enter the month when the crime occurred (1-12): "))
    day = int(input("Please enter the day of the month when the crime occurred (1-31): "))

    return pd.DataFrame([[state, area, year, month, day]], columns=['State', 'Area', 'Year', 'Month', 'Day'])

def predict_crime_type(model, preprocessor, user_input_df):
    # Transform the user input
    user_input_transformed = preprocessor.transform(user_input_df)

    # Predict probabilities using the base model
    meta_features = model.predict_proba(user_input_transformed)

    # Predict the final crime type using the meta-model
    crime_prediction = meta_model.predict(meta_features)
    crime_type = label_encoder.inverse_transform(crime_prediction)

    return crime_type[0]

# After training your models, use the following to get predictions
if __name__ == "__main__":
    # Get user input
    user_input_df = get_user_input()

    # Assume `classifier_pipeline` is the trained base model and `meta_model` is the trained meta-model
    predicted_crime_type = predict_crime_type(classifier_pipeline.named_steps['classifier'], preprocessor, user_input_df)

    print(f"The predicted type of crime is: {predicted_crime_type}")

Please enter the state where the crime occurred: Delhi
Please enter the area within the state: Connaught Place
Please enter the year when the crime occurred: 2023
Please enter the month when the crime occurred (1-12): 5
Please enter the day of the month when the crime occurred (1-31): 12
The predicted type of crime is: Fraud
