In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob, os, zipfile, re
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import gradio as gr
from google.colab import files

#  1. DATA LOADING
uploaded = files.upload()
zip_name = list(uploaded.keys())[0]
with zipfile.ZipFile(zip_name, 'r') as zip_ref:
    zip_ref.extractall('zara_data')

all_files = glob.glob('zara_data/**/*.csv', recursive=True)
full_list = []
for filename in all_files:
    try:
        temp = pd.read_csv(filename).loc[:, ~pd.read_csv(filename).columns.duplicated()].copy()
        temp['gender_label'] = 'Women' if 'women' in filename.lower() else 'Men'
        temp['category_label'] = os.path.basename(filename).lower().replace('.csv', '').title()
        full_list.append(temp)
    except: continue

df = pd.concat(full_list, axis=0, ignore_index=True).reset_index(drop=True)
df.columns = df.columns.str.strip().str.lower()

# 2. PREPROCESSING
color_palette = {'Black': '#000000', 'White': '#FFFFFF', 'Blue': '#1A5276', 'Red': '#A93226', 'Green': '#1D8348', 'Brown': '#6E2C00', 'Grey': '#7F8C8D', 'Pink': '#F5B7B1', 'Yellow': '#F4D03F', 'Beige': '#F5F5DC', 'Orange': '#E67E22', 'Ecru': '#C2B280', 'Other': '#D5D8DC'}

# Clean Price
p_data = df['price'].iloc[:, 0] if len(df['price'].shape) > 1 else df['price']
df['price_clean'] = p_data.astype(str).apply(lambda x: float(re.sub(r'[^\d.]', '', x)) if re.sub(r'[^\d.]', '', x) else 0.0)
df['price_clean'] = df['price_clean'].replace(0, df['price_clean'].median())

# Clean Color
t_col = 'details' if 'details' in df.columns else 'product_name' if 'product_name' in df.columns else df.columns[0]
t_data = df[t_col].iloc[:, 0] if len(df[t_col].shape) > 1 else df[t_col]
df['color_name'] = t_data.astype(str).apply(lambda x: next((c for c in color_palette.keys() if c.lower() in x.lower()), 'Other'))

# Final Encoding setup
le_gender = LabelEncoder().fit(['Men', 'Women'])
le_color = LabelEncoder().fit(list(color_palette.keys()))
le_cat = LabelEncoder().fit(df['category_label'])

df['gender_enc'] = le_gender.transform(df['gender_label'])
df['color_enc'] = le_color.transform(df['color_name'])
df['target_cat'] = le_cat.transform(df['category_label'])

#  3. MODELS & COMPARISON
X = df[['price_clean', 'gender_enc', 'color_enc']]
y = df['target_cat']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
rf_acc = accuracy_score(y_test, rf_model.predict(X_test))

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)
dt_acc = accuracy_score(y_test, dt_model.predict(X_test))

# 4. ANALYTICS LOGIC
def get_insights():
    # Correlation Map
    fig1, ax1 = plt.subplots(figsize=(7, 5))
    sns.heatmap(df[['price_clean', 'gender_enc', 'color_enc', 'target_cat']].corr(), annot=True, cmap='coolwarm', ax=ax1)
    ax1.set_title("Feature Correlation")

    # Inventory Distribution
    fig2, ax2 = plt.subplots(figsize=(8, 4))
    counts = df['color_name'].value_counts()
    sns.barplot(x=counts.index, y=counts.values, palette=[color_palette.get(c, '#ddd') for c in counts.index], ax=ax2)
    plt.xticks(rotation=45)

    # Comparison Table
    comp = pd.DataFrame({'Model': ['Random Forest', 'Decision Tree'], 'Accuracy (%)': [f"{rf_acc*100:.2f}%", f"{dt_acc*100:.2f}%"]})
    return fig1, fig2, comp

# --- 5. INTERFACE ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="zinc")) as app:
    gr.HTML("<h1 style='text-align:center;'> Smart Zara Products Categorization System</h1>")

    with gr.Tabs():
        with gr.TabItem(" Predictor"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### Step 1: Input Features")
                    g_in = gr.Radio(['Men', 'Women'], label="Gender")
                    c_in = gr.Dropdown(list(color_palette.keys()), label="Color")
                    p_in = gr.Slider(int(df.price_clean.min()), int(df.price_clean.max()), label="Price (â‚¹)")
                    btn = gr.Button("Analyze Trend", variant="primary")

                    # Color Palette Visual Guide
                    palette_html = "".join([f"<span style='background:{h}; color:{'white' if c in ['Black','Blue','Red'] else 'black'}; padding:5px; margin:2px; border-radius:5px; font-size:10px; display:inline-block; border:1px solid #ccc;'>{c}</span>" for c, h in color_palette.items()])
                    gr.HTML(f"<b>Color Guide:</b><br>{palette_html}")

                with gr.Column():
                    gr.Markdown("### AI Result")
                    res = gr.Label(label="Top Prediction Score")

            # Predict Fix
            def do_predict(g, c, p):
                g_idx = 1 if g == 'Women' else 0
                c_idx = le_color.transform([c])[0]
                pred = rf_model.predict([[p, g_idx, c_idx]])[0]
                return {le_cat.inverse_transform([pred])[0]: 1.0}

            btn.click(do_predict, [g_in, c_in, p_in], res)

        with gr.TabItem("ðŸ“Š EDA & Evaluation"):
            with gr.Row():
                p1 = gr.Plot(label="Correlation Heatmap")
                p2 = gr.Plot(label="Inventory Mix")
            comp_table = gr.Dataframe(label="Model Accuracy Comparison")
            sync = gr.Button("Generate Full Analytics Dashboard")
            sync.click(get_insights, None, [p1, p2, comp_table])

app.launch(share=True)

Saving archive (1).zip to archive (1) (13).zip


  with gr.Blocks(theme=gr.themes.Soft(primary_hue="zinc")) as app:


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://bdb5d27164d4fc7bc8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


