In [None]:
# ============================================================
# 1️⃣ IMPORTS & LOAD DATA
# ============================================================
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_class_weight

# Load dataset
df = pd.read_csv("/content/fashion_dataset_200.csv")

# Combine features into text
df['combined_text'] = (
    df['Skin Tone'] + " " +
    df['Weather Condition'] + " " +
    df['Work Level'] + " " +
    df['Season'] + " " +
    df['Recommended Outfit']
)

# Target
y = df['Recommended Outfit']  # Try 'Avoid Fabrics' if you prefer that task
X_text = df['combined_text']

In [None]:
# ============================================================
# 2️⃣ TF-IDF VECTORIZATION (TUNED)
# ============================================================
# Use a wider range of n-grams and limit features to avoid overfitting
vectorizer = TfidfVectorizer(
    stop_words='english',
    ngram_range=(1,3),     # capture more context
    max_features=3000,     # reduce noise
    sublinear_tf=True      # smooth TF values
)
X = vectorizer.fit_transform(X_text)

print("TF-IDF matrix:", X.shape)


TF-IDF matrix: (200, 603)


In [None]:
# ============================================================
# 3️⃣ TRAIN TEST SPLIT
# ============================================================
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print("Train:", X_train.shape, " Test:", X_test.shape)

Train: (160, 603)  Test: (40, 603)


In [None]:
# ============================================================
# 4️⃣ LOGISTIC REGRESSION (TUNED VIA GRIDSEARCH)
# ============================================================
param_grid = {
    'C': [0.1, 1, 5, 10, 20],
    'solver': ['liblinear', 'lbfgs'],
    'penalty': ['l2']
}

grid = GridSearchCV(
    LogisticRegression(max_iter=2000, multi_class='auto', class_weight='balanced'),
    param_grid,
    cv=5,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)

grid.fit(X_train, y_train)

print("✅ Best Params:", grid.best_params_)
#best_model = grid.best_estimator_


Fitting 5 folds for each of 10 candidates, totalling 50 fits




✅ Best Params: {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}




In [None]:
# ============================================================
# 5️⃣ EVALUATION
# ============================================================
y_pred = best_model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"🎯 Accuracy: {acc*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


🎯 Accuracy: 95.00%

Classification Report:
                           precision    recall  f1-score   support

            Active boots       1.00      1.00      1.00         1
             Active coat       1.00      1.00      1.00         1
     Active light blouse       1.00      1.00      1.00         1
  Active long-sleeve top       1.00      1.00      1.00         1
     Active pastel shirt       1.00      1.00      1.00         1
      Active scarf combo       1.00      1.00      1.00         1
   Active thermal jacket       1.00      1.00      1.00         1
      Active trench coat       1.00      1.00      1.00         1
            Casual boots       1.00      1.00      1.00         2
             Casual coat       0.00      0.00      0.00         1
           Casual hoodie       0.50      1.00      0.67         1
  Casual long-sleeve top       1.00      1.00      1.00         2
           Casual shorts       1.00      1.00      1.00         1
Casual waterproof jacket       

In [None]:
# ============================================================
# 6️⃣ SVM MODEL WITH CLASS BALANCING — CLEAN VERSION
# ============================================================
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from sklearn.exceptions import UndefinedMetricWarning

# Ignore sklearn metric warnings (undefined precision/recall)
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

# Train SVM model
svm_model = SVC(
    kernel='rbf',          # RBF kernel for non-linear decision boundaries
    C=10,                  # Higher C = less regularization
    gamma='scale',         # Auto gamma scaling
    class_weight='balanced',  # Handle imbalanced classes
    probability=True,      # Allow probability outputs
    random_state=42
)
svm_model.fit(X_train, y_train)

# Predictions
y_pred_svm = svm_model.predict(X_test)

# Evaluation metrics
acc_svm = accuracy_score(y_test, y_pred_svm)
print(f"🎯 SVM Accuracy: {acc_svm*100:.2f}%\n")

print("📊 Classification Report (No Warnings):\n")
print(classification_report(y_test, y_pred_svm, zero_division=0))


🎯 SVM Accuracy: 90.00%


                          precision    recall  f1-score   support

            Active boots       1.00      1.00      1.00         1
             Active coat       1.00      1.00      1.00         1
     Active light blouse       1.00      1.00      1.00         1
  Active long-sleeve top       1.00      1.00      1.00         1
     Active pastel shirt       1.00      1.00      1.00         1
      Active scarf combo       1.00      1.00      1.00         1
   Active thermal jacket       1.00      1.00      1.00         1
      Active trench coat       1.00      1.00      1.00         1
            Casual boots       1.00      1.00      1.00         2
         Casual cardigan       0.00      0.00      0.00         0
             Casual coat       0.00      0.00      0.00         1
           Casual hoodie       1.00      1.00      1.00         1
     Casual light blouse       0.00      0.00      0.00         0
  Casual long-sleeve top       1.00      1.00     

In [None]:
# ============================================================
# 7️⃣ SAVE BEST MODEL & VECTORIZER
# ============================================================
with open("fashion_model_best.pkl", "wb") as f:
    pickle.dump(best_model, f)

with open("fashion_vectorizer_best.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("\n✅ Model and vectorizer saved successfully!")



✅ Model and vectorizer saved successfully!


In [None]:
# ============================================================
# 8️⃣ CUSTOM PREDICTION FUNCTION
# ============================================================
def predict_fabric(skin_tone, weather, work_level, season, outfit_desc):
    text = f"{skin_tone} {weather} {work_level} {season} {outfit_desc}"
    vector = vectorizer.transform([text])
    pred = best_model.predict(vector)[0]
    print(f"\n🧾 Input: {text}")
    print(f"💡 Predicted Fabric: {pred}")

# Test Examples
predict_fabric("Fair", "Sunny", "Office", "Summer", "white cotton shirt")
predict_fabric("Deep", "Cold", "Outdoor", "Winter", "wool coat and gloves")



🧾 Input: Fair Sunny Office Summer white cotton shirt
💡 Predicted Fabric: Formal cotton t-shirt

🧾 Input: Deep Cold Outdoor Winter wool coat and gloves
💡 Predicted Fabric: Casual wool sweater
