In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error

In [23]:
# Load the dataset
file_path = '/content/new_dataset.xlsx'
dataset = pd.read_excel(file_path)

In [24]:
# Data preprocessing: Encode categorical data (City and Type)
le_city = LabelEncoder()
le_type = LabelEncoder()

In [25]:
dataset['City'] = le_city.fit_transform(dataset['City'])
dataset['Type'] = le_type.fit_transform(dataset['Type'])

In [26]:
# Defining the features (X) and target (y)
X = dataset[['Year', 'City', 'Population (in Lakhs) (2011)+', 'Type']]
y = dataset['Crime Rate']

In [27]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
# Scale the features (standardization)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [29]:
# Train the SVM model with better hyperparameter tuning
# Using an RBF kernel for better non-linear relationships
svm_model = SVR(kernel='rbf', C=10, epsilon=0.1)  # Higher C allows better fitting
svm_model.fit(X_train_scaled, y_train)


In [30]:
# Make predictions
svm_pred = svm_model.predict(X_test_scaled)

In [31]:
# Calculate R² score for accuracy
svm_r2 = r2_score(y_test, svm_pred)

In [32]:
# Check RMSE (Root Mean Squared Error) for additional evaluation
svm_rmse = mean_squared_error(y_test, svm_pred, squared=False)



In [33]:
# Convert R² to percentage
svm_accuracy = svm_r2 * 100

In [34]:
# Display the accuracy and RMSE
print(f"SVM Model Accuracy (R²): {svm_accuracy:.2f}%")

# Check if accuracy falls between 50-60%
#if 50 <= svm_accuracy <= 60:
#    print("SVM model achieved the desired accuracy between 50-60%.")
# else:
 #   print("SVM model accuracy is outside the desired range.")

SVM Model Accuracy (R²): 23.32%
