In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset from the provided file path
file_path = r"C:\Users\91944\Desktop\Research\Crime\All datasets\cyber-crimes-from-ncrb-master-data-year-state-and-city-wise-total-number-of-cyber-crimes-committed-in-india.csv"
data = pd.read_csv(file_path)

# Data Preprocessing
# Filter data for Kolkata and other major metros
selected_cities = ['Kolkata', 'Mumbai', 'Delhi', 'Bengaluru', 'Chennai']
filtered_data = data[data['city'].isin(selected_cities)].copy()

# Drop rows with missing values
filtered_data = filtered_data.dropna(subset=['year', 'city', 'value'])

# Convert year to datetime for time series analysis
filtered_data['year'] = pd.to_datetime(filtered_data['year'], format='%Y')

# Pivot data for line graph visualization (years as index, cities as columns)
pivot_data = filtered_data.pivot(index='year', columns='city', values='value')

# Visualization 1: Line graph showing yearly fluctuations in Kolkata vs other cities
plt.figure(figsize=(14, 6))
for city in selected_cities:
    plt.plot(pivot_data.index, pivot_data[city], label=city)
plt.title("Yearly Cybercrime Trends: Kolkata vs Other Major Metros")
plt.xlabel("Year")
plt.ylabel("Number of Cybercrimes")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("line_graph_kolkata_vs_metros.png", dpi=300)
plt.close()
print("Line graph saved as 'line_graph_kolkata_vs_metros.png'.")

# Add a column for city-wise volatility (standard deviation of yearly values)
volatility_data = filtered_data.groupby('city')['value'].std().reset_index()
volatility_data.columns = ['City', 'Volatility']

# Visualization 2: Box plot to illustrate extreme crime variations in Kolkata vs others
plt.figure(figsize=(12, 6))
sns.boxplot(x='city', y='value', data=filtered_data[filtered_data['city'].isin(selected_cities)])
plt.title("Cybercrime Variations: Kolkata vs Other Major Metros")
plt.xlabel("City")
plt.ylabel("Number of Cybercrimes")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("boxplot_kolkata_vs_metros.png", dpi=300)
plt.close()
print("Box plot saved as 'boxplot_kolkata_vs_metros.png'.")


Line graph saved as 'line_graph_kolkata_vs_metros.png'.
Box plot saved as 'boxplot_kolkata_vs_metros.png'.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset from the provided file path
file_path = r"C:\Users\91944\Desktop\Research\Crime\All datasets\cyber-crimes-from-ncrb-master-data-year-state-and-city-wise-total-number-of-cyber-crimes-committed-in-india.csv"
data = pd.read_csv(file_path)

# Data Preprocessing
# Filter data for Kolkata and other major metros
selected_cities = ['Kolkata', 'Mumbai', 'Delhi', 'Bengaluru', 'Chennai']
filtered_data = data[data['city'].isin(selected_cities)].copy()

# Drop rows with missing values
filtered_data = filtered_data.dropna(subset=['year', 'city', 'value'])

# Ensure 'year' is numeric
filtered_data['year'] = pd.to_numeric(filtered_data['year'], errors='coerce')
filtered_data = filtered_data.dropna(subset=['year'])

# Pivot data for easier comparison (years as index, cities as columns)
pivot_data = filtered_data.pivot_table(index='year', columns='city', values='value')

# Handle missing years: fill with 0 or interpolate
pivot_data = pivot_data.fillna(0)

# Calculate year-over-year percentage change
percentage_change = pivot_data.pct_change() * 100

# Visualization 1: Line graph showing yearly fluctuations in Kolkata vs other cities
plt.figure(figsize=(14, 6))
for city in selected_cities:
    plt.plot(percentage_change.index, percentage_change[city], label=city)
plt.title("Yearly Cybercrime Percentage Change: Kolkata vs Other Major Metros")
plt.xlabel("Year")
plt.ylabel("Percentage Change in Cybercrimes")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("line_graph_kolkata_vs_metros_pct_change.png", dpi=300)
plt.close()
print("Line graph (percentage change) saved as 'line_graph_kolkata_vs_metros_pct_change.png'.")

# Calculate volatility as the standard deviation of percentage changes
city_volatility = percentage_change.std()

# Visualization 2: Bar plot of volatility for each city
plt.figure(figsize=(10, 6))
city_volatility.sort_values().plot(kind='bar')
plt.title("Volatility of Cybercrime Trends (Standard Deviation of Percentage Change)")
plt.xlabel("City")
plt.ylabel("Volatility (Std Dev of % Change)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("volatility_bar_plot.png", dpi=300)
plt.close()
print("Volatility bar plot saved as 'volatility_bar_plot.png'.")
