In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import random


In [5]:
# Load dataset
file_path = '/content/fully_mapped_crime_hotspot_data.csv'
df = pd.read_csv(file_path)


In [6]:
# Display dataset structure
def explore_data():
    print("Dataset Structure:")
    print(df.head())
    print("\nColumns:", df.columns)

explore_data()

Dataset Structure:
       location          crime_type      time        date  \
0  LODHI COLONY            gangrape  22:31:00  2020-06-15   
1   MANDIR MARG  sexual harassement  15:47:00  2019-09-28   
2   TUGLAK ROAD            gangrape  01:34:00  2020-06-19   
3  PANDAV NAGAR               theft  12:32:00  2019-10-28   
4      GHAZIPUR             robbery  15:47:00  2019-08-31   

  investigation_status case_number             comments  latitude  longitude  \
0                 Open       C8160                    -   28.5933    77.2273   
1  Under Investigation       C8906                    -   28.6325    77.2080   
2                  NaN         NaN  Missing information   28.6010    77.2177   
3                 Open       C1009                    -   28.6329    77.2878   
4               Closed         NaN  Missing information   28.6228    77.3121   

   crime_category  hour day_of_week      month  
0  Sexual Offense    22      Monday       June  
1  Sexual Offense    15    Saturday

In [7]:
# Preprocessing
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le


In [8]:
# Feature selection (modify as needed)
X = df.iloc[:, :-1]  # All columns except last
y = df.iloc[:, -1]   # Target column

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Train a simple model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

def get_crime_statistics(city=None):
    if city and 'City' in df.columns:
        city_data = df[df['City'] == city]
        return {
            "total_records": len(city_data),
            "most_common_crime": city_data['Crime_Type'].mode()[0] if 'Crime_Type' in df.columns else 'N/A'
        }
    return {
        "total_records": len(df),
        "crime_types": df['Crime_Type'].nunique() if 'Crime_Type' in df.columns else 'N/A',
        "most_common_crime": df['Crime_Type'].mode()[0] if 'Crime_Type' in df.columns else 'N/A'
    }

def get_crime_trends():
    if 'Year' in df.columns:
        trends = df.groupby('Year').size()
        return trends.to_dict()
    return "No year data available."

def chat_bot():
    print("Crime Chatbot Activated. Ask me a question!")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit']:
            print("Chatbot: Goodbye!")
            break
        elif "crime rate" in user_input.lower():
            city = input("Enter your city: ")
            stats = get_crime_statistics(city)
            print(f"Chatbot: In {city}, Total Crime Records: {stats['total_records']}, Most Common Crime: {stats['most_common_crime']}")
        elif "crime trend" in user_input.lower():
            trends = get_crime_trends()
            print(f"Chatbot: Crime trends over the years: {trends}")
        elif "cybercrime" in user_input.lower():
            print("Chatbot: Common cybercrimes include phishing, identity theft, ransomware, and hacking.")
        else:
            print("Chatbot: Sorry, I don't have an answer for that. Try asking about crime rates, trends, or cybercrimes.")

# Run chatbot
chat_bot()

Crime Chatbot Activated. Ask me a question!
You: WHAT ARE THE CURRENT CRIME RATE IN MY CITY
Enter your city: DELHI
Chatbot: In DELHI, Total Crime Records: 3500, Most Common Crime: N/A
You: CYBERCRIME IN DELHI
Chatbot: Common cybercrimes include phishing, identity theft, ransomware, and hacking.
