### **1. Data Loading & Time Indexing**

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("/content/sample_data/India_COVID19_Statewise_TimeSeries_Analytics_2021.csv")

In [2]:
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(['State_UT', 'Date'])

Purpose - Ensure consistent structure before deriving temporal features.

### **2. Lag Features (Temporal Dependency Capture)**

In [3]:
lag_features = [1, 3, 7]

for lag in lag_features:
    df[f'New_Cases_Lag_{lag}'] = df.groupby('State_UT')['New_Cases'].shift(lag)

Purpose - Capture short-term dependency patterns required for autoregressive modeling.

### **3. Rolling Mean (Trend Smoothing)**

In [4]:
df['Rolling_Mean_7'] = df.groupby('State_UT')['New_Cases'].transform(lambda x: x.rolling(7).mean())
df['Rolling_Mean_14'] = df.groupby('State_UT')['New_Cases'].transform(lambda x: x.rolling(14).mean())

Purpose - Reduce volatility noise and capture short-term trend momentum.

### **4. Rolling Standard Deviation (Volatility Feature)**

In [5]:
df['Rolling_Std_7'] = df.groupby('State_UT')['New_Cases'].transform(lambda x: x.rolling(7).std())

Purpose - Quantify case volatility for surge detection.

### **5. Growth Acceleration Feature**

In [6]:
df['Growth_Rate'] = df.groupby('State_UT')['Total_Cases'].pct_change()
df['Growth_Acceleration'] = df.groupby('State_UT')['Growth_Rate'].diff()

Purpose - Capture second-order change (momentum shift).

### **6. Population-Normalized Features**

In [7]:
df['Cases_per_100k'] = (df['Total_Cases'] / df['Population']) * 100000
df['Active_per_100k'] = (df['Active_Cases'] / df['Population']) * 100000

Purpose - Remove raw count bias.

### **7. Temporal Decomposition Features**

In [8]:
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Day_of_Week'] = df['Date'].dt.dayofweek
df['Is_Weekend'] = df['Day_of_Week'].apply(lambda x: 1 if x >= 5 else 0)

Purpose - Extract calendar-driven patterns.

### **8. Feature Cleanup**

In [9]:
df = df.dropna().reset_index(drop=True)

df.head()

Unnamed: 0,Date,State_UT,Population,New_Cases,New_Deaths,New_Recoveries,Total_Cases,Total_Deaths,Total_Recoveries,Active_Cases,...,Rolling_Mean_14,Rolling_Std_7,Growth_Rate,Growth_Acceleration,Cases_per_100k,Active_per_100k,Day,Month,Day_of_Week,Is_Weekend
0,2021-01-14,Andaman and Nicobar,57755036,514,11,426,7033,134,5935,964,...,502.357143,16.184943,0.078846,0.000256,12.177293,1.669119,14,1,3,0
1,2021-01-15,Andaman and Nicobar,57755036,492,11,426,7525,145,6361,1019,...,503.428571,17.065699,0.069956,-0.008891,13.029167,1.764348,15,1,4,0
2,2021-01-16,Andaman and Nicobar,57755036,504,16,426,8029,161,6787,1081,...,503.928571,16.133373,0.066977,-0.002979,13.901818,1.871698,16,1,5,1
3,2021-01-17,Andaman and Nicobar,57755036,468,7,415,8497,168,7202,1127,...,501.071429,16.886174,0.058289,-0.008688,14.712137,1.951345,17,1,6,1
4,2021-01-18,Andaman and Nicobar,57755036,504,7,436,9001,175,7638,1188,...,501.857143,17.125587,0.059315,0.001026,15.584788,2.056963,18,1,0,0


Purpose - Remove rows affected by lag/rolling NaNs.