In [1]:

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

# Load dataset
df = pd.read_csv("weather_outfit_dataset_1100_modified.csv")

# Encode target variable
le = LabelEncoder()
df['Outfit'] = le.fit_transform(df['Outfit'].astype(str))

# One-hot encode categorical features
categorical_cols = ['Weather', 'TimeOfDay', 'LocationType', 'Gender', 'AgeGroup']
df = pd.get_dummies(df, columns=categorical_cols)

# Features and Target
X = df.drop('Outfit', axis=1)
Y = df['Outfit']

# Handle class imbalance
sm = SMOTE(random_state=42)
X_res, Y_res = sm.fit_resample(X, Y)

# Train-Test Split
X_train, X_test, Y_train, Y_test = train_test_split(X_res, Y_res, test_size=0.2, random_state=42, stratify=Y_res)

# Base Models
rf = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42)
dt = DecisionTreeClassifier(max_depth=10, min_samples_split=10, min_samples_leaf=5, random_state=42)
lr = LogisticRegression(max_iter=1000)
xgb = XGBClassifier(eval_metric='mlogloss', random_state=42)

# Stacking Ensemble
estimators = [('rf', rf), ('dt', dt), ('xgb', xgb)]
stack = StackingClassifier(estimators=estimators, final_estimator=lr, cv=5)
stack.fit(X_train, Y_train)

# Prediction and Evaluation
Y_pred = stack.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
print(f"Stacked Model Accuracy: {accuracy:.4f}")

# # Cross-validation Accuracy
# cv = StratifiedKFold(n_splits=10)
# scores = cross_val_score(stack, X_res, Y_res, cv=cv, scoring='accuracy')
# print("Cross-validated Accuracy:", scores.mean())


Stacked Model Accuracy: 0.8728


In [2]:
X_test

Unnamed: 0,Temperature,Humidity,WindSpeed,Weather_Cloudy,Weather_Rainy,Weather_Snowy,Weather_Sunny,Weather_Windy,TimeOfDay_Day,TimeOfDay_Evening,...,TimeOfDay_Night,LocationType_Coastal,LocationType_Mountain,LocationType_Urban,Gender_Female,Gender_Male,AgeGroup_Adult,AgeGroup_Child,AgeGroup_Senior,AgeGroup_Teen
1392,10,57,2,False,False,False,True,False,False,False,...,False,False,False,False,False,True,False,False,False,False
1649,28,90,28,False,False,False,True,False,False,False,...,False,False,False,False,False,True,False,False,False,False
640,12,78,15,False,False,False,False,True,False,False,...,False,False,True,False,True,False,False,False,True,False
997,25,65,3,True,False,False,False,False,False,False,...,True,False,True,False,True,False,False,False,False,True
402,7,70,24,False,False,False,False,True,False,False,...,False,True,False,False,False,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,28,21,12,False,True,False,False,False,True,False,...,False,False,True,False,True,False,False,False,False,True
233,13,61,38,False,False,True,False,False,False,False,...,True,False,True,False,True,False,False,False,True,False
1302,28,29,23,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
725,15,20,14,False,False,True,False,False,False,False,...,True,False,True,False,False,True,False,True,False,False


In [3]:
Y_test

1392    2
1649    4
640     3
997     0
402     5
       ..
230     1
233     5
1302    2
725     5
1722    4
Name: Outfit, Length: 346, dtype: int64

In [4]:
pred = stack.predict(X_test.iloc[3, 0:21].values.reshape(1,-1))

In [5]:
pred

array([0])