In [3]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset from the provided file path
file_path = r"C:\Users\91944\Desktop\Research\Crime\All datasets\cyber-crimes-from-ncrb-master-data-year-state-and-city-wise-total-number-of-cyber-crimes-committed-in-india.csv"
data = pd.read_csv(file_path)

# Data Preprocessing
# Filter relevant years (2019–2021) for analysis
filtered_data = data[data['year'].isin([2019, 2020, 2021])].copy()

# Drop rows with missing values in 'city' or 'value'
filtered_data = filtered_data.dropna(subset=['city', 'value'])

# Remove "Total Cities" entry for meaningful city-level analysis
filtered_data = filtered_data[filtered_data['city'] != 'Total Cities']

# Pivot data for easier comparison of cybercrime rates across years
pivot_data = filtered_data.pivot(index='city', columns='year', values='value').fillna(0)

# Rename columns to ensure they are strings (if necessary)
pivot_data.columns = pivot_data.columns.astype(str)

# Calculate percentage change in cybercrime rates between 2019 and 2021
pivot_data['Percentage Change'] = ((pivot_data['2021'] - pivot_data['2019']) / pivot_data['2019']) * 100

# Prepare data for SVM classification
X = pivot_data[['2019', '2020', '2021']].values  # Features: cybercrime rates over three years
y = (pivot_data['Percentage Change'] > 0).astype(int)  # Target: Positive change (1) or negative/no change (0)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train SVM classifier
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = svm_model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

# Visualization: Before-after bar chart for pre-pandemic (2019) vs post-pandemic (2021)
plt.figure(figsize=(14, 6))
pivot_data[['2019', '2021']].sort_values(by='2021', ascending=False).plot(kind='bar', figsize=(14, 6), width=0.8)
plt.title("Cybercrime Rates: Pre-Pandemic (2019) vs Post-Pandemic (2021)")
plt.xlabel("Cities")
plt.ylabel("Number of Cybercrimes")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.savefig("before_after_bar_chart.png", dpi=300)
plt.close()
print("Before-after bar chart saved as 'before_after_bar_chart.png'.")

# Visualization: Heatmap for percentage change in cybercrime rates (2019–2021)
plt.figure(figsize=(12, 8))
sns.heatmap(pivot_data[['Percentage Change']].sort_values(by='Percentage Change', ascending=False), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Percentage Change in Cybercrime Rates (2019–2021)")
plt.xlabel("Year")
plt.ylabel("Cities")
plt.tight_layout()
plt.savefig("percentage_change_heatmap.png", dpi=300)
plt.close()
print("Heatmap saved as 'percentage_change_heatmap.png'.")


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         4

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6

Accuracy Score: 1.0
Before-after bar chart saved as 'before_after_bar_chart.png'.
Heatmap saved as 'percentage_change_heatmap.png'.


<Figure size 1400x600 with 0 Axes>