In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import pickle

# Load the dataset
df = pd.read_csv('../artifacts/weather_classification_data.csv')

# Check missing values
missing_values = df.isnull().sum()
print("Missing values per column:\n", missing_values)

Missing values per column:
 Temperature             0
Humidity                0
Wind Speed              0
Precipitation (%)       0
Cloud Cover             0
Atmospheric Pressure    0
UV Index                0
Season                  0
Visibility (km)         0
Location                0
Weather Type            0
dtype: int64


In [None]:
from sklearn.impute import SimpleImputer

# Define column groups
num_cols = ['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 'Atmospheric Pressure', 'UV Index', 'Visibility (km)']
ordinal_cols = ['ParentalSupport', 'ParentalEducation']
nominal_cols = ['Cloud Cover', 'Season', 'Location', 'Weather Type']

# Filter only columns with missing values
ordinal_missing = [col for col in ordinal_cols if df[col].isnull().any()]
nominal_missing = [col for col in nominal_cols if df[col].isnull().any()]
num_missing = [col for col in num_cols if df[col].isnull().any()]

# Impute Numerical (mean)
if num_missing:
    imputer = SimpleImputer(strategy='mean')
    df[num_missing] = imputer.fit_transform(df[num_missing])
    print(f"Mean imputation applied to numerical columns: {num_missing}")
else:
    print("No missing values found in numerical columns.")

# Impute Ordinal (mean)
if ordinal_missing:
    mean_imputer = SimpleImputer(strategy='mean')
    df[ordinal_missing] = mean_imputer.fit_transform(df[ordinal_missing])
    print(f"Mean imputation applied to ordinal columns: {ordinal_missing}")
else:
    print("No missing values found in ordinal columns.")

# Impute Nominal (mode)
if nominal_missing:
    mode_imputer = SimpleImputer(strategy='most_frequent')
    df[nominal_missing] = mode_imputer.fit_transform(df[nominal_missing])
    print(f"Mode imputation applied to nominal columns: {nominal_missing}")
else:
    print("No missing values found in nominal columns.")