In [34]:
import pandas as pd
import numpy as np

# === Load & Clean Dataset ===
df = pd.read_csv("Cleaned_Final_csv.csv")
df.columns = df.columns.str.strip().str.lower()

# === Remove invalid/misleading article types ===
df = df[~df['articletype'].str.lower().isin(['shorts', 'short skirts', 'swimsuit'])]

# === Fix known synonyms ===
df['articletype'] = df['articletype'].replace({
    # Topwear fixes
    'kurtis': 'kurtas',
    'kurta sets': 'kurtas',
    'dresses': 'kurtas',

    # Bottomwear fixes
    'lounge pants': 'trousers',
    'salwar': 'trousers',
    'patiala': 'trousers',

    # Outerwear fixes
    'jackets': 'jacket',
    'hoodie': 'jacket',
    'shrug': 'dupatta',
    'shawl': 'dupatta'
})

# === Category Mapping ===
topwear = {'kurtas', 'shirts', 'tops'}
bottomwear = {'jeans', 'trousers', 'leggings', 'capris'}
outerwear = {'jacket', 'coat', 'puffer jacket', 'dupatta'}

def classify_article_type(article):
    article = str(article).strip().lower()
    if article in topwear:
        return 'top'
    elif article in bottomwear:
        return 'bottom'
    elif article in outerwear:
        return 'outer'
    return None

df['category'] = df['articletype'].apply(classify_article_type)
df = df[df['category'].notnull()]

# === Outerwear Mapping by Season & Temperature ===
outerwear_mapping = {
    'puffer jacket': {'temp_range': (0, 10), 'season': 'winter'},
    'coat': {'temp_range': (5, 15), 'season': 'winter'},
    'jacket': {'temp_range': (12, 22), 'season': 'fall'},
    'dupatta': {'temp_range': (25, 40), 'season': 'summer'}
}

# === Seasonal targets (Balanced total: 1500) ===
season_targets = {
    'winter': 375,
    'summer': 375,
    'spring': 375,
    'fall': 375
}

# === Split Categories ===
tops = df[df['category'] == 'top']
bottoms = df[df['category'] == 'bottom']
outers = df[df['category'] == 'outer']

# === Filter valid outerwear ===
outers = outers[outers['articletype'].isin(outerwear_mapping.keys())]

# === Final dataset generation ===
records = []

for season, count in season_targets.items():
    for _ in range(count):
        if season == 'summer':
            temperature = np.random.randint(26, 39)
            outer_type = np.random.choice(['dupatta', ''])  # ✅ No jackets in summer
        elif season == 'winter':
            temperature = np.random.randint(2, 14)
            matching_outers = outers[outers['articletype'].apply(lambda x: outerwear_mapping[x]['season'] == 'winter')]
            outer_type = matching_outers.sample(1).iloc[0]['articletype'] if not matching_outers.empty else ''
        elif season == 'fall':
            temperature = np.random.randint(15, 25)
            matching_outers = outers[outers['articletype'].apply(lambda x: outerwear_mapping[x]['season'] == 'fall')]
            outer_type = matching_outers.sample(1).iloc[0]['articletype'] if not matching_outers.empty else ''
        else:  # spring
            temperature = np.random.randint(18, 28)
            outer_type = ''  # usually not needed in spring

        top = tops.sample(1).iloc[0]['articletype']
        bottom = bottoms.sample(1).iloc[0]['articletype']

        record = {
            'temperature': temperature,
            'feels_like': temperature + np.random.randint(-2, 3),
            'humidity': np.random.randint(35, 90),
            'wind_speed': np.random.uniform(1, 15),
            'weather_condition': np.random.choice(['clear', 'cloudy', 'foggy', 'sunny']),
            'time_of_day': np.random.choice(['morning', 'afternoon', 'evening', 'night']),
            'season': season,
            'occasion': np.random.choice(['casual', 'work', 'party']),
            'recommended_top': top,
            'recommended_bottom': bottom,
            'recommended_outer': outer_type
        }

        records.append(record)

# === Save Final Clean Dataset ===
final_df = pd.DataFrame(records)
final_df.to_csv("enhanced_weather_clothing_dataset_women_balanced_cleaned.csv", index=False)
print("✅ Final dataset created with correct summer outerwear (dupatta/none) and saved.")


✅ Final dataset created with correct summer outerwear (dupatta/none) and saved.


# modal training 

In [35]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
import joblib

# === Load dataset ===
df = pd.read_csv("enhanced_weather_clothing_dataset_women_balanced_cleaned.csv")

# === Drop missing values ===
df = df.dropna()

# === Define features and targets ===
features = ['temperature', 'feels_like', 'humidity', 'wind_speed',
            'weather_condition', 'time_of_day', 'season', 'occasion']
targets = ['recommended_top', 'recommended_bottom', 'recommended_outer']

X = df[features]
y = df[targets]

# === Split dataset ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# === Preprocessing ===
categorical_features = ['weather_condition', 'time_of_day', 'season', 'occasion']
numerical_features = ['temperature', 'feels_like', 'humidity', 'wind_speed']

preprocessor = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ('num', 'passthrough', numerical_features)
])

# === Model ===
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
multi_output_model = MultiOutputClassifier(rf_model)

# === Pipeline ===
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', multi_output_model)
])

# === Train model ===
pipeline.fit(X_train, y_train)

# === Predict and evaluate ===
y_pred = pipeline.predict(X_test)
for i, target in enumerate(targets):
    print(f"\n=== Classification Report for {target} ===")
    print(classification_report(y_test[target], y_pred[:, i]))

# === Save model ===
joblib.dump(pipeline, "weather_clothing_recommender_women.pkl")
print("\n✅ Model saved as: weather_clothing_recommender_women.pkl")



=== Classification Report for recommended_top ===
              precision    recall  f1-score   support

      kurtas       0.29      0.28      0.29        64
      shirts       0.40      0.58      0.47        73
        tops       0.50      0.20      0.28        51

    accuracy                           0.37       188
   macro avg       0.40      0.35      0.35       188
weighted avg       0.39      0.37      0.36       188


=== Classification Report for recommended_bottom ===
              precision    recall  f1-score   support

      capris       0.00      0.00      0.00        17
       jeans       0.33      0.35      0.34        66
    leggings       0.00      0.00      0.00        16
    trousers       0.49      0.62      0.55        89

    accuracy                           0.41       188
   macro avg       0.21      0.24      0.22       188
weighted avg       0.35      0.41      0.38       188


=== Classification Report for recommended_outer ===
               precision  

In [36]:
for target in targets:
    print(f"\n{target} class counts:\n{df[target].value_counts()}")



recommended_top class counts:
recommended_top
shirts    390
kurtas    322
tops      225
Name: count, dtype: int64

recommended_bottom class counts:
recommended_bottom
trousers    425
jeans       343
leggings    104
capris       65
Name: count, dtype: int64

recommended_outer class counts:
recommended_outer
jacket           375
coat             194
dupatta          187
puffer jacket    181
Name: count, dtype: int64


In [39]:
import joblib
import pandas as pd

# === Load Saved Model ===
pipeline = joblib.load("weather_clothing_recommender_women.pkl")

# === Define Single Test Input ===
test_input = pd.DataFrame([{
    'temperature': 12,
    'feels_like': 29,
    'humidity': 60,
    'wind_speed': 8,
    'weather_condition': 'sunny',       # 'sunny', 'cloudy', 'clear', etc.
    'time_of_day': 'afternoon',         # 'morning', 'afternoon', 'evening', 'night'
    'season': 'summer',                 # 'summer', 'winter', 'spring', 'fall'
    'occasion': 'casual'                # 'casual', 'work', 'party'
}])

# === Make Prediction ===
predictions = pipeline.predict(test_input)[0]

# === Display Results ===
print("👚 Recommended Top:    ", predictions[0])
print("👖 Recommended Bottom: ", predictions[1])
print("🧥 Recommended Outer:  ", predictions[2])


👚 Recommended Top:     shirts
👖 Recommended Bottom:  jeans
🧥 Recommended Outer:   dupatta


2025-06-15 04:26:40.354 
  command:

    streamlit run c:\Users\hP\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-06-15 04:26:40.363 Session state does not function when running a script without `streamlit run`
