In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib
import os

# Paths
BASE_DIR = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(BASE_DIR, "data")
MODELS_DIR = os.path.join(BASE_DIR, "models")

os.makedirs(MODELS_DIR, exist_ok=True)

# Load raw data
df = pd.read_csv(os.path.join(DATA_DIR, "raw_weather_data.csv"))

# Drop missing values
df.dropna(inplace=True)

# Label Encoding (TARGET)
le = LabelEncoder()
df["weather"] = le.fit_transform(df["weather"])

# Feature Scaling (INPUTS)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(
    df[["temperature", "humidity", "pressure", "wind_speed"]]
)

processed_df = pd.DataFrame(
    X_scaled,
    columns=["temperature", "humidity", "pressure", "wind_speed"]
)
processed_df["weather"] = df["weather"]

# Save processed data
processed_df.to_csv(
    os.path.join(DATA_DIR, "processed_weather_data.csv"),
    index=False
)

# ðŸ”¥ SAVE OBJECTS (THIS WAS MISSING)
joblib.dump(scaler, os.path.join(MODELS_DIR, "scaler.pkl"))
joblib.dump(le, os.path.join(MODELS_DIR, "label_encoder.pkl"))

print("âœ… Preprocessing complete")
print("âœ… scaler.pkl and label_encoder.pkl saved")

âœ… Preprocessing complete
âœ… scaler.pkl and label_encoder.pkl saved
