In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, cross_val_score
import tkinter as tk
from tkinter import messagebox
import tkinter as tk
from tkinter import ttk


In [2]:
file_path = r"C:\Users\Aryan\Downloads\Book1.csv"
try:
    data = pd.read_csv(file_path)
except FileNotFoundError:
    print(f'File {file_path} not found.')
    exit()

In [3]:
onehot_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

numerical_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

In [4]:
preprocessor = ColumnTransformer(transformers=[
    ('categorical', onehot_transformer, ['State', 'PinCode']),
    ('numerical', numerical_transformer, ['Year', 'Population'])
])


label = data['Crime Rate']

In [5]:
X = data.drop('Crime Rate', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, label, test_size=0.2, random_state=42)


preprocessor.fit(X_train)

In [6]:
X_train_transformed = preprocessor.transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

In [7]:
print(f'Shape of X_train_transformed: {X_train_transformed.shape}')
print(f'Shape of X_test_transformed: {X_test_transformed.shape}')


Shape of X_train_transformed: (3904, 439)
Shape of X_test_transformed: (976, 439)


In [8]:
lr_model = LinearRegression(fit_intercept=True)
lr_model.fit(X_train_transformed, y_train)

In [9]:
cv_scores = cross_val_score(lr_model, X_train_transformed, y_train, cv=5, n_jobs=-1)
print(f'Cross-validation scores for Linear Regression: {cv_scores}')
print(f'Mean cross-validation score for Linear Regression: {np.mean(cv_scores)}')

Cross-validation scores for Linear Regression: [-0.16554532 -0.08498638 -0.11462261 -0.10502634 -0.14931278]
Mean cross-validation score for Linear Regression: -0.12389868625357048


In [10]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_transformed, y_train)

In [None]:
cv_scores = cross_val_score(rf_model, X_train_transformed, y_train, cv=5, n_jobs=-1)
print(f'Cross-validation scores for Random Forest Regression: {cv_scores}')
print(f'Mean cross-validation score for Random Forest Regression: {np.mean(cv_scores)}')


Cross-validation scores for Random Forest Regression: [-0.14479068 -0.13678062 -0.11484134 -0.18389744 -0.14788459]
Mean cross-validation score for Random Forest Regression: -0.1456389333262634


In [12]:
lr_predictions = lr_model.predict(X_test_transformed)
rf_predictions = rf_model.predict(X_test_transformed)

In [13]:
lr_mse = mean_squared_error(y_test, lr_predictions)
lr_r2 = r2_score(y_test, lr_predictions)
print(f'Linear Regression MSE: {lr_mse}')
print(f'Linear Regression R-squared: {lr_r2}')

Linear Regression MSE: 6.640189578138707
Linear Regression R-squared: -0.04151123918360011


In [14]:
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
print(f'Random Forest Regression MSE: {rf_mse}')
print(f'Random Forest Regression R-squared: {rf_r2}')

Random Forest Regression MSE: 7.166610348360656
Random Forest Regression R-squared: -0.12408013910340232


In [15]:
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
print(f'Random Forest Regression MSE: {rf_mse}')
print(f'Random Forest Regression R-squared: {rf_r2}')

Random Forest Regression MSE: 7.166610348360656
Random Forest Regression R-squared: -0.12408013910340232


In [16]:
def predict_crime_rate(model, features):
    transformed_features = preprocessor.transform(pd.DataFrame(features).T)
    prediction = model.predict(transformed_features)
    return prediction[0]


In [2]:
import tkinter as tk
from tkinter import ttk

# GUI
root = tk.Tk()
root.title("Crime Rate Prediction")
root.geometry("600x600")
root.config(bg='#F0F0F0')

# Header
header_label = tk.Label(root, text="Crime Rate Prediction", font=("Helvetica", 24), bg='#FF5733', fg='white')
header_label.pack(pady=10)

# Input fields
input_frame = tk.Frame(root, bg='#F0F0F0')
input_frame.pack(pady=10)

label_bg = '#FF5733'
label_fg = 'white'
entry_bg = 'white'
entry_fg = 'black'
entry_width = 20

# State Input
state_label = tk.Label(input_frame, text="State:", bg=label_bg, fg=label_fg, font=("Helvetica", 14))
state_label.grid(row=0, column=0, padx=5, pady=5)
state_var = tk.StringVar()
state_entry = tk.Entry(input_frame, textvariable=state_var, bg=entry_bg, fg=entry_fg, font=("Helvetica", 14), width=entry_width)
state_entry.grid(row=0, column=1, padx=5, pady=5)

# PinCode Input
pincode_label = tk.Label(input_frame, text="PinCode:", bg=label_bg, fg=label_fg, font=("Helvetica", 14))
pincode_label.grid(row=1, column=0, padx=5, pady=5)
pincode_var = tk.StringVar()
pincode_entry = tk.Entry(input_frame, textvariable=pincode_var, bg=entry_bg, fg=entry_fg, font=("Helvetica", 14), width=entry_width)
pincode_entry.grid(row=1, column=1, padx=5, pady=5)

# Year Input
year_label = tk.Label(input_frame, text="Year:", bg=label_bg, fg=label_fg, font=("Helvetica", 14))
year_label.grid(row=2, column=0, padx=5, pady=5)
year_var = tk.StringVar()
year_entry = tk.Entry(input_frame, textvariable=year_var, bg=entry_bg, fg=entry_fg, font=("Helvetica", 14), width=entry_width)
year_entry.grid(row=2, column=1, padx=5, pady=5)

# Population Input
population_label = tk.Label(input_frame, text="Population:", bg=label_bg, fg=label_fg, font=("Helvetica", 14))
population_label.grid(row=3, column=0, padx=5, pady=5)
population_var = tk.StringVar()
population_entry = tk.Entry(input_frame, textvariable=population_var, bg=entry_bg, fg=entry_fg, font=("Helvetica", 14), width=entry_width)
population_entry.grid(row=3, column=1, padx=5, pady=5)

# Model Selection Dropdown
model_label = tk.Label(input_frame, text="Model:", bg=label_bg, fg=label_fg, font=("Helvetica", 14))
model_label.grid(row=4, column=0, padx=5, pady=5)
model_var = tk.StringVar()
model_dropdown = ttk.Combobox(input_frame, textvariable=model_var, values=["Linear Regression", "Random Forest"], font=("Helvetica", 14), width=entry_width)
model_dropdown.grid(row=4, column=1, padx=5, pady=5)
model_dropdown.current(0)

# Predict Button
predict_button = tk.Button(root, text="Predict", command=predict, font=("Helvetica", 14), bg='#FF5733', fg='white')
predict_button.pack(pady=10)

# Results Frame
results_frame = tk.Frame(root, bg='#F0F0F0')
results_frame.pack(pady=10)

# Prediction Result Label
prediction_label = tk.Label(results_frame, text="Prediction: ", bg='#F0F0F0', font=("Helvetica", 14))
prediction_label.grid(row=0, column=0, padx=5, pady=5)

# Cross Validation Score Label
cv_label = tk.Label(results_frame, text="Cross Validation Score: ", bg='#F0F0F0', font=("Helvetica", 14))
cv_label.grid(row=1, column=0, padx=5, pady=5)

# Mean Squared Error Label
mse_label = tk.Label(results_frame, text="Mean Squared Error: ", bg='#F0F0F0', font=("Helvetica", 14))
mse_label.grid(row=2, column=0, padx=5, pady=5)

# R-squared Label
r2_label = tk.Label(results_frame, text="R² Score: ", bg='#F0F0F0', font=("Helvetica", 14))
r2_label.grid(row=3, column=0, padx=5, pady=5)

# Function to display results
def display_results(prediction, cv_score, mse, r2):
    prediction_label.config(text=f"Prediction: {prediction:.2f}")
    cv_label.config(text=f"Cross Validation Score: {cv_score:.2f}")
    mse_label.config(text=f"Mean Squared Error: {mse:.2f}")
    r2_label.config(text=f"R² Score: {r2:.2f}")

# Example usage of display_results function
# This should be replaced with the actual prediction logic once implemented
# display_results(0, 0, 0, 0)

# Start the GUI event loop
root.mainloop()


NameError: name 'predict' is not defined