In [None]:
# Data Preprocessing
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load datasets
train_file = 'data/train.csv'
weather_file = 'data/weather_train.csv'

data = pd.read_csv(train_file)
weather_data = pd.read_csv(weather_file)

# Merge datasets
data = data.merge(weather_data, on=['site_id', 'timestamp'], how='left')

# Feature engineering
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['hour'] = data['timestamp'].dt.hour
data['day'] = data['timestamp'].dt.day
data['month'] = data['timestamp'].dt.month

# Select relevant features
features = ['hour', 'day', 'month', 'air_temperature', 'dew_temperature', 'sea_level_pressure', 'wind_speed']
target = 'meter_reading'

# Handle missing values
data = data.dropna(subset=features)

# Normalize features
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

# Prepare data for training
X = data[features]
y = data[target]

# Save processed data
X.to_csv('data/processed/features.csv', index=False)
y.to_csv('data/processed/target.csv', index=False)

print("Data preprocessing completed.")
