# Encoding values

In [5]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import pickle
df = pd.read_csv('../artifacts/cleaned_data.csv')

cover_encoder = LabelEncoder()
season_encoder = LabelEncoder()
location_encoder = LabelEncoder()
weather_type_encoder = LabelEncoder()

df["Cloud Cover"] = cover_encoder.fit_transform(df["Cloud Cover"])
df['Season'] = season_encoder.fit_transform(df['Season'])
df['Location'] = location_encoder.fit_transform(df['Location'])
df['Weather Type'] = weather_type_encoder.fit_transform(df['Weather Type'])

encoders = {
    'cover_encoder': cover_encoder,
    'season_encoder': season_encoder,
    'location_encoder': location_encoder,
    'weather_type_encoder': weather_type_encoder
}

df['Temperature_Humidity'] = df['Temperature'] * df['Humidity']
df['Wind_Speed_Precip'] = df['Wind Speed'] * df['Precipitation (%)']

# Save to a .pkl file
with open("../data/label_encoders.pkl", "wb") as f:
    pickle.dump(encoders, f)

# Viewing of the label encoded 

In [6]:
with open("../data/label_encoders.pkl", "rb") as f:
    encoders = pickle.load(f)

for col, encoder in encoders.items():
    mapping = dict(zip(encoder.transform(encoder.classes_), encoder.classes_))
    print(f"\n{col} Mapping:")
    for k, v in mapping.items():
        print(f"  {k} → {v}")


cover_encoder Mapping:
  0 → clear
  1 → cloudy
  2 → overcast
  3 → partly cloudy

season_encoder Mapping:
  0 → Autumn
  1 → Spring
  2 → Summer
  3 → Winter

location_encoder Mapping:
  0 → coastal
  1 → inland
  2 → mountain

weather_type_encoder Mapping:
  0 → Cloudy
  1 → Rainy
  2 → Snowy
  3 → Sunny


# Scaling

In [7]:
from sklearn.preprocessing import StandardScaler

numerical_columns = ['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 
                     'Atmospheric Pressure', 'UV Index', 'Visibility (km)', 
                     'Temperature_Humidity', 'Wind_Speed_Precip']

scaler = StandardScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

with open("../data/scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

# Saving the file

In [8]:
weatherType = df['Weather Type']
df.drop(columns=['Weather Type'], inplace=True)
df['Weather Type'] = weatherType

with open("../artifacts/engineered_data.csv", "wb") as f:
    df.to_csv(f, index=False)