In [31]:
import pandas as pd
import numpy as np
import os

# --- Configuration ---
DATA_DIR = '../data'
# Get all the CSV files we downloaded
all_files = [f for f in os.listdir(DATA_DIR) if f.endswith('_gtrends.csv')]

processed_trends = {}

# --- Feature Engineering Loop ---
for file in all_files:
    keyword = file.replace('_gtrends.csv', '')
    print(f"Processing: {keyword}")
    
    df = pd.read_csv(os.path.join(DATA_DIR, file), index_col='date', parse_dates=True)
    
    # We only care about the volume, drop the 'is_partial' flag
    df = df[['gtrends_volume']]

    # --- Engineer Features ---
    # Lag Features (past values)
    for lag in [1, 2, 3, 7, 14]:
        df[f'vol_lag_{lag}'] = df['gtrends_volume'].shift(lag)

    # Rolling Window Features (local trends)
    for window in [3, 7, 14]:
        df[f'vol_rolling_mean_{window}'] = df['gtrends_volume'].rolling(window=window).mean()
        df[f'vol_rolling_std_{window}'] = df['gtrends_volume'].rolling(window=window).std()
        df[f'vol_rolling_max_{window}'] = df['gtrends_volume'].rolling(window=window).max()

    # Difference Features (velocity and acceleration)
    df['vol_diff_1'] = df['gtrends_volume'].diff(1) # Daily change
    df['vol_diff_7'] = df['gtrends_volume'].diff(7) # Weekly change
    df['vol_accel_1'] = df['vol_diff_1'].diff(1)     # Daily acceleration

    # Clean up NaNs created by shifts and rolling windows
    df.fillna(0, inplace=True)
    
    processed_trends[keyword] = df

print(f"\nSuccessfully processed {len(processed_trends)} trends.")


Processing: Agnipath_scheme_IN
Processing: Among_Us_WW
Processing: Animal_Crossing_WW
Processing: Apple_Vision_Pro_WW
Processing: Area_51_raid_WW
Processing: Barbenheimer_WW
Processing: BeReal_WW
Processing: Bigg_Boss_IN
Processing: Bitcoin_price_WW
Processing: Black_Lives_Matter_WW
Processing: Brahmastra_IN
Processing: Brexit_WW
Processing: Bridgerton_WW
Processing: Chandrayaan-3_IN
Processing: ChatGPT_WW
Processing: Citizenship_Amendment_Act_IN
Processing: Clubhouse_app_WW
Processing: corn_song_WW
Processing: COVID-19_symptoms_WW
Processing: Cyberpunk_2077_WW
Processing: Demonetisation_India_IN
Processing: Dhinchak_Pooja_IN
Processing: Elden_Ring_WW
Processing: Elon_Musk_Twitter_WW
Processing: Fall_Guys_WW
Processing: Farmers_protest_India_IN
Processing: fidget_spinner_WW
Processing: G20_summit_Delhi_IN
Processing: GameStop_stock_WW
Processing: Harlem_Shake_WW
Processing: ice_bucket_challenge_WW
Processing: India_vs_Pakistan_World_Cup_IN
Processing: James_Webb_Telescope_WW
Processing

In [33]:
processed_trends['Sora_AI_WW']

Unnamed: 0_level_0,gtrends_volume,vol_lag_1,vol_lag_2,vol_lag_3,vol_lag_7,vol_lag_14,vol_rolling_mean_3,vol_rolling_std_3,vol_rolling_max_3,vol_rolling_mean_7,vol_rolling_std_7,vol_rolling_max_7,vol_rolling_mean_14,vol_rolling_std_14,vol_rolling_max_14,vol_diff_1,vol_diff_7,vol_accel_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2024-02-15,13,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
2024-02-16,100,13.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,87.0,0.0,0.0
2024-02-17,74,100.0,13.0,0.0,0.0,0.0,62.333333,44.657959,100.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,-26.0,0.0,-113.0
2024-02-18,60,74.0,100.0,13.0,0.0,0.0,78.000000,20.297783,100.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,-14.0,0.0,12.0
2024-02-19,55,60.0,74.0,100.0,0.0,0.0,63.000000,9.848858,74.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,-5.0,0.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-11,3,3.0,3.0,3.0,3.0,3.0,3.000000,0.000000,3.0,3.000000,0.000000,3.0,3.000000,0.000000,3.0,0.0,0.0,0.0
2024-05-12,2,3.0,3.0,3.0,3.0,3.0,2.666667,0.577350,3.0,2.857143,0.377964,3.0,2.928571,0.267261,3.0,-1.0,-1.0,-1.0
2024-05-13,3,2.0,3.0,3.0,3.0,3.0,2.666667,0.577350,3.0,2.857143,0.377964,3.0,2.928571,0.267261,3.0,1.0,0.0,2.0
2024-05-14,3,3.0,2.0,3.0,3.0,3.0,2.666667,0.577350,3.0,2.857143,0.377964,3.0,2.928571,0.267261,3.0,0.0,0.0,-1.0
