In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib
import os

In [3]:
# Paths
BASE_DIR = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(BASE_DIR, "data")
MODELS_DIR = os.path.join(BASE_DIR, "models")
os.makedirs(MODELS_DIR, exist_ok=True)


In [4]:
# Load raw data
df = pd.read_csv(os.path.join(DATA_DIR, "raw_weather_data.csv"))


In [5]:

# Drop missing values
df.dropna(inplace=True)


In [6]:
# Label Encoding (TARGET)
le = LabelEncoder()
df["weather"] = le.fit_transform(df["weather"])


In [None]:
# Feature Scaling (INPUTS)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(
    df[["temperature", "humidity", "pressure", "wind_speed"]]
)
processed_df = pd.DataFrame(
    X_scaled,
    columns=["temperature", "humidity", "pressure", "wind_speed"]
)
processed_df["weather"] = df["weather"]


In [None]:
# Save processed data
processed_df.to_csv(
    os.path.join(DATA_DIR, "processed_weather_data.csv"),
    index=False
)


In [None]:
joblib.dump(scaler, os.path.join(MODELS_DIR, "scaler.pkl"))
joblib.dump(le, os.path.join(MODELS_DIR, "label_encoder.pkl"))

print("Preprocessing complete")
print("scaler.pkl and label_encoder.pkl saved")