In [2]:
import pandas as pd

In [8]:
# feature_engineering.py

import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

# Load your cleaned data
df = pd.read_csv("cleaned_weather_data.csv", parse_dates=['date_time'])

# Time-based Features
df['year'] = df['date_time'].dt.year
df['month'] = df['date_time'].dt.month
df['day'] = df['date_time'].dt.day
df['hour'] = df['date_time'].dt.hour
df['weekday'] = df['date_time'].dt.weekday
df['is_weekend'] = df['weekday'].isin([5, 6]).astype(int)

# Season Feature
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

df['season'] = df['month'].apply(get_season)

# Wind Direction Label
def wind_direction_label(degree):
    directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
    idx = int((degree + 22.5) // 45) % 8
    return directions[idx]

df['wind_dir_label'] = df['wind_direction'].apply(wind_direction_label)

# Encode categorical features
le = LabelEncoder()
df['season_encoded'] = le.fit_transform(df['season'])
df['wind_dir_encoded'] = le.fit_transform(df['wind_dir_label'])

# Scale numerical features
scaler = MinMaxScaler()
numerical_cols = ['temperature', 'humidity', 'wind_speed', 'pressure', 'precipitation', 'cloud_coverage']
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

# Save engineered features to new CSV
df.to_csv("weather_data_with_features.csv", index=False)

print("Feature engineering completed and saved to 'weather_data_with_features.csv'")


Feature engineering completed and saved to 'weather_data_with_features.csv'
