In [None]:
# imported pacakges


In [None]:
import pandas as pd

In [None]:
crime_data = pd.read_excel('/content/crime_data.xlsx')

In [None]:
crime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7584 entries, 0 to 7583
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   CATEGORY         7584 non-null   object        
 1   CALL GROUPS      7584 non-null   object        
 2   final_case_type  7584 non-null   object        
 3   CASE DESC        7584 non-null   object        
 4   occ_date         7584 non-null   datetime64[ns]
 5   x_coordinate     7584 non-null   int64         
 6   y_coordinate     7584 non-null   int64         
 7   census_tract     7220 non-null   float64       
 8   day              7584 non-null   int64         
 9   month            7584 non-null   int64         
 10  year             7584 non-null   int64         
dtypes: datetime64[ns](1), float64(1), int64(5), object(4)
memory usage: 651.9+ KB


In [None]:
crime_test_data = pd.read_excel('/content/crime_test_data.xlsx')

In [None]:
crime_test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7584 entries, 0 to 7583
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   CATEGORY         7584 non-null   object        
 1   CALL GROUPS      7584 non-null   object        
 2   final_case_type  7584 non-null   object        
 3   CASE DESC        7584 non-null   object        
 4   occ_date         7584 non-null   datetime64[ns]
 5   x_coordinate     7584 non-null   int64         
 6   y_coordinate     7584 non-null   int64         
 7   census_tract     7220 non-null   float64       
dtypes: datetime64[ns](1), float64(1), int64(2), object(4)
memory usage: 474.1+ KB


In [None]:
total_data = pd.read_excel('/content/total_data.xlsx')

In [None]:
total_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7584 entries, 0 to 7583
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    7584 non-null   int64  
 1   day_of_week   7584 non-null   int64  
 2   month         7584 non-null   int64  
 3   year          7584 non-null   int64  
 4   x_coordinate  7584 non-null   int64  
 5   y_coordinate  7584 non-null   int64  
 6   census_tract  7584 non-null   float64
 7   CATEGORY      7584 non-null   object 
dtypes: float64(1), int64(6), object(1)
memory usage: 474.1+ KB


In [None]:
!pip install geopandas



In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split


# Converted latitude and longitude into a GeoDataFrame
geometry = [Point(xy) for xy in zip(crime_data['x_coordinate'], crime_data['y_coordinate'])]
crime_gdf = gpd.GeoDataFrame(crime_data, geometry=geometry)
crime_gdf.set_crs(epsg=4326, inplace=True)

# Calculated bounding box dynamically from the data
xmin, ymin, xmax, ymax = (
    crime_gdf.total_bounds[0],
    crime_gdf.total_bounds[1],
    crime_gdf.total_bounds[2],
    crime_gdf.total_bounds[3]
)

# Defined grid size
cell_size = 0.002  # Approx. 250x250 sq. ft

# Created a grid of polygons
grid_cells = []
x_start, x_end, y_start, y_end = xmin, xmax, ymin, ymax
while x_start < x_end:
    y_temp = y_start
    while y_temp < y_end:
        grid_cells.append(Polygon([
            (x_start, y_temp),
            (x_start + cell_size, y_temp),
            (x_start + cell_size, y_temp + cell_size),
            (x_start, y_temp + cell_size)
        ]))
        y_temp += cell_size
    x_start += cell_size

# Created a GeoDataFrame for the grid
grid_gdf = gpd.GeoDataFrame(geometry=grid_cells, crs="EPSG:4326")

# Performed a spatial join to map crimes to grid cells
crime_to_grid = gpd.sjoin(crime_gdf, grid_gdf, how="left", op="within")

# Counted crimes per grid cell
crime_counts = crime_to_grid.groupby('index_right').size().reset_index(name='crime_count')
grid_gdf['crime_count'] = 0  # Initialize with 0
grid_gdf.loc[crime_counts['index_right'], 'crime_count'] = crime_counts['crime_count']

# Defined the hotspot threshold (e.g., 95th percentile of crime counts)
threshold = grid_gdf['crime_count'].quantile(0.95)
grid_gdf['hotspot'] = (grid_gdf['crime_count'] >= threshold).astype(int)

# Visualized the grid with hotspots
fig, ax = plt.subplots(figsize=(12, 12))
grid_gdf.plot(ax=ax, column='hotspot', cmap='coolwarm', legend=True, alpha=0.6)
plt.title("Crime Hotspots")
plt.show()

# Added  features for classification
grid_gdf['neighbor_crime_count'] = grid_gdf.geometry.apply(
    lambda cell: grid_gdf[grid_gdf.geometry.touches(cell)]['crime_count'].sum()
)

# Created the feature and target dataset
features = grid_gdf[['crime_count', 'neighbor_crime_count']].fillna(0)
target = grid_gdf['hotspot']

# Splited into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Trained a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predicted on the test set
y_pred = clf.predict(X_test)

# Evaluated the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Add predictions to the grid for visualization
grid_gdf['predicted_hotspot'] = 0  # Initialize
grid_gdf.loc[X_test.index, 'predicted_hotspot'] = y_pred

# Visualized predicted hotspots
fig, ax = plt.subplots(figsize=(12, 12))
grid_gdf.plot(ax=ax, column='predicted_hotspot', cmap='coolwarm', legend=True, alpha=0.6)
plt.title("Predicted Hotspots")
plt.show()

# Saved the results
grid_gdf.to_file('predicted_hotspots.geojson', driver='GeoJSON')
grid_gdf[['crime_count', 'neighbor_crime_count', 'hotspot', 'predicted_hotspot']].to_csv('predicted_hotspots.csv', index=False)
