# Encoding values

In [1]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import pickle
df = pd.read_csv('../artifacts/cleaned_data.csv')

cover_encoder = LabelEncoder()
season_encoder = LabelEncoder()
location_encoder = LabelEncoder()

df["Cloud Cover"] = cover_encoder.fit_transform(df["Cloud Cover"])
df['Season'] = season_encoder.fit_transform(df['Season'])
df['Location'] = location_encoder.fit_transform(df['Location'])
df['Weather Type'] = location_encoder.fit_transform(df['Weather Type'])

encoders = {
    "Cloud Cover": cover_encoder,
    "Season": season_encoder,
    "Location": location_encoder,
    "Weather Type": location_encoder
}

df['Temperature_Humidity'] = df['Temperature'] * df['Humidity']
df['Wind_Speed_Precip'] = df['Wind Speed'] * df['Precipitation (%)']

# Save to a .pkl file
with open("../data/label_encoders.pkl", "wb") as f:
    pickle.dump(encoders, f)

# Viewing of the label encoded 

In [2]:
with open("../data/label_encoders.pkl", "rb") as f:
    encoders = pickle.load(f)

for col, encoder in encoders.items():
    mapping = dict(zip(encoder.transform(encoder.classes_), encoder.classes_))
    print(f"\n{col} Mapping:")
    for k, v in mapping.items():
        print(f"  {k} → {v}")


Cloud Cover Mapping:
  0 → 0
  1 → 1
  2 → 2
  3 → 3

Season Mapping:
  0 → 0
  1 → 1
  2 → 2
  3 → 3

Location Mapping:
  0 → 0
  1 → 1
  2 → 2
  3 → 3

Weather Type Mapping:
  0 → 0
  1 → 1
  2 → 2
  3 → 3


# Scaling

In [3]:
from sklearn.preprocessing import StandardScaler

numerical_columns = ['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 
                     'Atmospheric Pressure', 'UV Index', 'Visibility (km)', 
                     'Temperature_Humidity', 'Wind_Speed_Precip']

scaler = StandardScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

with open("../data/scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

# Saving the file

In [4]:
weatherType = df['Weather Type']
df.drop(columns=['Weather Type'], inplace=True)
df['Weather Type'] = weatherType

with open("../artifacts/engineered_data.csv", "wb") as f:
    df.to_csv(f, index=False)