In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

In [2]:
df=pd.read_csv("fashion_color_dataset_refined.csv")

In [3]:
df.head(10)

Unnamed: 0,shirt_color_name,shirt_hex,pant_color_name,pant_hex,accessory_suggestion,style_type
0,red,#FF0000,black,#000000,belt,casual
1,red,#FF0000,black,#000000,bracelet,sport
2,red,#FF0000,black,#000000,bracelet,sport
3,red,#FF0000,black,#000000,cap,sport
4,red,#FF0000,black,#000000,watch,sport
5,red,#FF0000,black,#000000,cap,party
6,red,#FF0000,black,#000000,watch,formal
7,red,#FF0000,black,#000000,sunglass,formal
8,red,#FF0000,black,#000000,sunglass,formal
9,red,#FF0000,black,#000000,bracelet,casual


In [4]:
def hex_to_rgb(hex_color):
    hex_color = hex_color.lstrip('#')
    return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))

df[['shirt_r', 'shirt_g', 'shirt_b']] = df['shirt_hex'].apply(lambda x: pd.Series(hex_to_rgb(x)))
df[['pant_r', 'pant_g', 'pant_b']] = df['pant_hex'].apply(lambda x: pd.Series(hex_to_rgb(x)))


In [5]:
le_pant = LabelEncoder()
df['pant_label'] = le_pant.fit_transform(df['pant_color_name'])



In [6]:
X = df[['shirt_r', 'shirt_g', 'shirt_b', 'pant_r', 'pant_g', 'pant_b']] / 255.0


y = df['pant_label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [7]:
from sklearn.ensemble import RandomForestClassifier

In [8]:
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)


In [9]:
accuracy = model.score(X_test, y_test)
print(f"Model Accuracy: {accuracy*100:.2f}%")


Model Accuracy: 100.00%


In [10]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
print("KNN Accuracy:", knn.score(X_test, y_test) * 100)


KNN Accuracy: 100.0


In [11]:
df['pant_color_name'].value_counts()


pant_color_name
white    280
black    200
brown    160
beige    120
grey     120
cream     80
blue      40
navy      40
Name: count, dtype: int64

In [12]:
def color_to_rgb(color_input):
    colors_dict = {
        "red": "#FF0000",
        "black": "#000000",
        "white": "#FFFFFF",
        "blue": "#0000FF",
        "navy": "#000080",
        "beige": "#F5F5DC",
        "grey": "#808080",
        "green": "#008000",
        "brown": "#A52A2A",
        "cream": "#FFFDD0"
    }
    
    # If color name, convert to hex
    if color_input.lower() in colors_dict:
        color_input = colors_dict[color_input.lower()]
    
    # Convert hex ‚Üí RGB
    color_input = color_input.lstrip('#')
    return [int(color_input[i:i+2], 16)/255.0 for i in (0, 2, 4)]


In [13]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances

def recommend_pant_color(shirt_color_name, df):
    """
    Predicts the best matching pant color for a given shirt color name.
    Input: shirt_color_name (str)
    Output: dict with suggested pant color, accessory, and style.
    """

    # Color name to RGB map (expand if you like)
    color_map = {
        "red": (255, 0, 0),
        "blue": (0, 0, 255),
        "green": (0, 255, 0),
        "white": (255, 255, 255),
        "black": (0, 0, 0),
        "yellow": (255, 255, 0),
        "orange": (255, 165, 0),
        "gray": (128, 128, 128),
        "purple": (128, 0, 128),
        "pink": (255, 192, 203),
        "maroon": (128, 0, 0),
        "navy": (0, 0, 128),
        "beige": (245, 245, 220),
        "brown": (139, 69, 19),
        "sky blue": (135, 206, 235),
        "olive": (128, 128, 0),
    }

    # Check if entered color exists
    if shirt_color_name.lower() not in color_map:
        return f"‚ùå '{shirt_color_name}' not found in color list. Try another like red, blue, black, white, etc."

    shirt_rgb = np.array(color_map[shirt_color_name.lower()]).reshape(1, -1)

    # Create combined RGB tuple for shirt in dataset
    df["shirt_rgb"] = list(zip(df["shirt_r"], df["shirt_g"], df["shirt_b"]))

    # Find shirt color closest to user‚Äôs input using Euclidean distance
    df["distance"] = df["shirt_rgb"].apply(lambda x: euclidean_distances([x], shirt_rgb)[0][0])
    best_match = df.loc[df["distance"].idxmin()]

    return {
        "üëï Input Shirt Color": shirt_color_name.capitalize(),
        "üëñ Suggested Pant Color": best_match["pant_color_name"].capitalize(),
        "üé® Pant RGB": (best_match["pant_r"], best_match["pant_g"], best_match["pant_b"]),
        "üï∂ Accessory Suggestion": best_match["accessory_suggestion"],
        "üíé Style Type": best_match["style_type"].capitalize()
    }


In [14]:
result = recommend_pant_color("white", df)
print(result)



{'üëï Input Shirt Color': 'White', 'üëñ Suggested Pant Color': 'Black', 'üé® Pant RGB': (0, 0, 0), 'üï∂ Accessory Suggestion': 'cap', 'üíé Style Type': 'Party'}


In [15]:
import joblib
joblib.dump(model,"model.pkl")
df.to_csv("color_dataset.csv", index=False)