In [12]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

# Ensure the 'Results' folder exists
results_folder = os.path.join('S2', 'Results')
os.makedirs(results_folder, exist_ok=True)  # This will create the folder if it doesn't exist

# Load the data
data = pd.read_csv('../Data/Crime_data.csv')

# Remove leading/trailing spaces from column names
data.columns = data.columns.str.strip()

# Selecting features and target for classification
X_classification = data[['Population_Density', 'Crime_Frequency']]  # Features
y_classification = data['Crime_Occurred']  # Target variable (Crime Occurred, 0 or 1)

# Handle missing values if necessary (for this example, we'll drop rows with missing values)
data = data.dropna()

# Feature scaling for classification (standardization)
scaler = StandardScaler()
X_classification_scaled = scaler.fit_transform(X_classification)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_classification_scaled, y_classification, test_size=0.2, random_state=42)

# Initialize and train the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict on the test set with KNN
y_pred_knn = knn.predict(X_test)

# Calculate accuracy and classification report for KNN
accuracy_knn = accuracy_score(y_test, y_pred_knn)
report_knn = classification_report(y_test, y_pred_knn)

# Prepare the KNN results to save
knn_results = (
    f"KNN Model Evaluation:\n"
    f"Accuracy: {accuracy_knn}\n"
    f"Classification Report:\n{report_knn}\n"
)

# Save the KNN results in a text file in the 'Results' folder
with open(os.path.join(results_folder, 'knn_results.txt'), 'w') as file:
    file.write(knn_results)

print("KNN results saved successfully.")


KNN results saved successfully.
