In [None]:
import pandas as pd
df = pd.read_csv("/content/cleaned_crime_data.csv")
df.head()

Unnamed: 0,Crime_Type,Year,Crime_Count
0,Rape,2001,16075
1,Kidnapping and Abduction of Women & Girls,2001,14645
2,Dowry Deaths,2001,6851
3,Assault on women with intent to outrage her mo...,2001,34124
4,Insult to the modesty of Women,2001,9746


# Aggregate to National Level

In [None]:
national_df = (
    df.groupby('Year')['Crime_Count']
      .sum()
      .reset_index()
)
national_df.head()

Unnamed: 0,Year,Crime_Count
0,2001,287590
1,2002,286068
2,2003,281202
3,2004,308666
4,2005,311104


# Create Lag Features

In [None]:
national_df['lag_1'] = national_df['Crime_Count'].shift(1)
national_df['lag_2'] = national_df['Crime_Count'].shift(2)

# Rolling Averages

In [None]:
national_df['rolling_3yr_avg'] = (
    national_df['Crime_Count']
    .rolling(window=3)
    .mean()
)

# Year-on-year growth rate

In [None]:
national_df['growth_rate'] = (
    national_df['Crime_Count']
    .pct_change() * 100
)

# Handle NaN Values

In [None]:
national_df = national_df.dropna()
national_df.reset_index(drop=True, inplace=True)
national_df.head()

Unnamed: 0,Year,Crime_Count,lag_1,lag_2,rolling_3yr_avg,growth_rate
0,2003,281202,286068.0,287590.0,284953.333333,-1.700994
1,2004,308666,281202.0,286068.0,291978.666667,9.766645
2,2005,311104,308666.0,281202.0,300324.0,0.789851
3,2006,329530,311104.0,308666.0,316433.333333,5.922778
4,2007,370624,329530.0,311104.0,337086.0,12.470488


In [None]:
national_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Year             20 non-null     int64  
 1   Crime_Count      20 non-null     int64  
 2   lag_1            20 non-null     float64
 3   lag_2            20 non-null     float64
 4   rolling_3yr_avg  20 non-null     float64
 5   growth_rate      20 non-null     float64
dtypes: float64(4), int64(2)
memory usage: 1.1 KB


In [None]:
national_df.to_csv("/content/feature_engineered_data.csv", index=False)

### Feature Engineering Summary

- Created lag features to capture historical dependency
- Added rolling averages to smooth long-term trends
- Computed year-on-year growth rate to capture momentum
- Prepared dataset for time-series machine learning models


In [None]:
from google.colab import files
files.download("/content/feature_engineered_data.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>