In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('./data/hour.csv', parse_dates=['dteday'])

categorical_cols = ['season', 'weathersit', 'weekday', 'mnth', 'hr']
for col in categorical_cols:
    df[col] = df[col].astype('category')



df['hr_sin'] = np.sin(2 * np.pi * df['hr'].astype(int)/24)
df['hr_cos'] = np.cos(2 * np.pi * df['hr'].astype(int)/24)

df['mnth_sin'] = np.sin(2 * np.pi * df['mnth'].astype(int)/12)
df['mnth_cos'] = np.cos(2 * np.pi * df['mnth'].astype(int)/12)

df['weekday_sin'] = np.sin(2 * np.pi * df['weekday'].astype(int)/7)
df['weekday_cos'] = np.cos(2 * np.pi * df['weekday'].astype(int)/7)

# Hourly features
if 'hr' in df.columns:
    df['hr_sin'] = np.sin(2 * np.pi * df['hr'].astype(int)/24)
    df['hr_cos'] = np.cos(2 * np.pi * df['hr'].astype(int)/24)

# Month
df['mnth_sin'] = np.sin(2 * np.pi * df['mnth'].astype(int)/12)
df['mnth_cos'] = np.cos(2 * np.pi * df['mnth'].astype(int)/12)

# Weekday
df['weekday_sin'] = np.sin(2 * np.pi * df['weekday'].astype(int)/7)
df['weekday_cos'] = np.cos(2 * np.pi * df['weekday'].astype(int)/7)

if 'casual' in df.columns: df = df.drop(columns=['casual'])
if 'registered' in df.columns: df = df.drop(columns=['registered'])

# Weekend indicator
df['is_weekend'] = df['weekday'].astype(int).isin([0,6]).astype(int)

# Optional: temperature bucket
df['temp_bucket'] = pd.qcut(df['temp'], 4, labels=False)

# Optional: holiday proximity
# df['holiday_proximity'] = df['dteday'].diff().dt.days.fillna(0)

if 'hr' in df.columns:
    # 1-step lag (previous hour)
    df['cnt_lag1'] = df['cnt'].shift(1)
    
    # Rolling mean of last 6 hours
    df['cnt_roll_6h'] = df['cnt'].shift(1).rolling(window=6).mean()

# Daily
if 'hr' not in df.columns:
    df.to_csv('./data/processed_day.csv', index=False)
# Hourly
else:
    df.to_csv('./data/processed_hour.csv', index=False)

# Raw features
with open('features_raw.txt', 'w') as f:
    for col in df.columns:
        f.write(col + '\n')

# Final features (excluding target 'cnt')
final_features = [c for c in df.columns if c != 'cnt']
with open('features_final.txt', 'w') as f:
    for col in final_features:
        f.write(col + '\n')

