In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

In [7]:
file_path = 'Emerging_Trend_Identification_Dataset.xlsx'
data = pd.read_excel(file_path)

In [8]:
input_columns = ['Year', 'Month', 'Geographic Location']
target_columns = ['BrandName','Category','Individual_Category','Category_by_Gender','Sales Data (units)', 'PrimaryColor', 'Size', 'Age Range', 
                  'Material', 'Pattern', 'Style', 'Sub-Style']

data = data.dropna(subset=input_columns + target_columns)

X = data[input_columns]
y = data[target_columns]

categorical_features = ['Geographic Location']
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(drop='first'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='passthrough'
)

models = {}

In [9]:
# Train a model for each target variable
for target in target_columns:
    y_target = y[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y_target, test_size=0.2, random_state=45)

    # Define model pipeline
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('scaler', StandardScaler(with_mean=False)),
        ('model', RandomForestRegressor(random_state=43))
    ])

    # Define hyperparameter grid
    param_grid = {
        'model__n_estimators': [150, 200, 300],
        'model__max_depth': [None, 10, 20, 30],
        'model__min_samples_split': [2, 5, 10],
        'model__min_samples_leaf': [1, 2, 4],
        'model__max_features': ['sqrt', 'log2', 0.5, 0.7, 0.9] 
    }

    # Perform grid search
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, error_score='raise')
    try:
        grid_search.fit(X_train, y_train)
        best_pipeline = grid_search.best_estimator_

        # Predict and evaluate
        y_pred = best_pipeline.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        # Store the model
        models[target] = best_pipeline

        print(f"Metrics for {target} - MAE: {mae}, MSE: {mse}, R2: {r2}")

    except Exception as e:
        print(f"Failed to fit model for {target}: {e}")

# Save models and preprocessing pipeline
joblib.dump(models, 'fashion_trend_models.pkl')
joblib.dump(preprocessor, 'preprocessor.pkl')

Metrics for BrandName - MAE: 2.0371483704902293, MSE: 5.27623184868878, R2: 0.018058305381344897
Metrics for Category - MAE: 0.8018726582657353, MSE: 0.8453521098910445, R2: -0.14908039008198615
Metrics for Individual_Category - MAE: 1.731209395070313, MSE: 3.994628578071273, R2: -0.08176119462578901
Metrics for Category_by_Gender - MAE: 0.715208118621606, MSE: 0.703726988432782, R2: -0.05627816374536154
Metrics for Sales Data (units) - MAE: 24.0911097920026, MSE: 738.9252664995416, R2: 0.8521812640454145
Metrics for PrimaryColor - MAE: 1.5170771385475554, MSE: 3.220858825915208, R2: 0.4975041485029361
Metrics for Size - MAE: 1.2584128412798543, MSE: 1.8794706528322827, R2: -0.019613935513439706
Metrics for Age Range - MAE: 0.5912799050205965, MSE: 0.5120259739661397, R2: -0.032115349097100454
Metrics for Material - MAE: 1.016501216336381, MSE: 1.3735243247399946, R2: 0.05562517332111383
Metrics for Pattern - MAE: 1.0101080729853862, MSE: 1.44602128897996, R2: -0.12100708270855587
Metr

['preprocessor.pkl']

In [17]:
import pandas as pd
import numpy as np
import joblib

# Load the trained models
loaded_models = joblib.load('fashion_trend_models.pkl')

# Define new data for prediction
new_data = pd.DataFrame({
    'Year': [2024],
    'Month': [1],
    'Geographic Location': [1]
})

# Ensure all expected columns are present in new_data
all_columns = ['Year', 'Month', 'Geographic Location']
for col in all_columns:
    if col not in new_data.columns:
        new_data[col] = 0

new_data = new_data[all_columns]

# Make predictions for each target using loaded models
predictions = {}
for target, pipeline in loaded_models.items():
    prediction = pipeline.predict(new_data)[0]
    predictions[target] = prediction

# Define mappings for categorical variables
mappings = {
    'BrandName': {1: 'Allen Solly', 2: 'Levi\'s', 3: 'Adidas', 4: 'Gucci', 5: 'H&M', 6: 'Puma', 7: 'Nike', 8: 'Zara'},
    'Category': {1: 'Clothing', 2: 'Footwear', 3: 'Accessories'},
    'Individual_Category': {1: 'Jackets', 2: 'T-Shirts', 3: 'Boots', 4: 'Sweaters', 5: 'Hats', 6: 'Sweatshirts'},
    'Category_by_Gender': {1: 'Women', 2: 'Men', 3: 'Unisex'},
    'PrimaryColor': {1: 'Black', 2: 'White', 3: 'Blue', 4: 'Red', 5: 'Pink', 6: 'Yellow', 7: 'Green', 8: 'Purple', 9: 'Brown', 10: 'Orange'},
    'Size': {1: 'XS', 2: 'S', 3: 'M', 4: 'L', 5: 'XL', 6: 'XXL'},
    'Age Range': {1: '18-24', 2: '25-34', 3: '35-44'},
    'Material': {1: 'Wool', 2: 'Cotton', 3: 'Denim', 4: 'Leather', 5: 'Polyester'},
    'Pattern': {1: 'Solid', 2: 'Printed', 3: 'Floral', 4: 'Polka Dot', 5: 'Striped'},
    'Search Trends': {1: 'High', 2: 'Medium', 3: 'Low'},
    'Style': {1: 'Western Wear', 2: 'Casual Wear', 3: 'Sports and Active Wear', 4: 'Party Wear', 5: 'Minimalist Fashion', 6: 'Hiking', 7: 'Urban Fashion', 8: 'Streetwear'},
    'Sub-Style': {1: 'Regular Fit', 2: 'Crew Neck', 3: 'Windbreaker', 4: 'Heeled', 5: 'Loose Fit', 6: 'Fedora', 7: 'Hooded', 8: 'Hiking', 9: 'Bomber'}
}

# Round numerical predictions and map categorical predictions
rounded_predictions = {}
for target, pred in predictions.items():
    if target in mappings:
        if isinstance(pred, np.float64):
            pred = round(pred)  # Round numerical predictions
        mapped_value = mappings[target].get(pred, f'Unknown ({pred})')
        rounded_predictions[target] = mapped_value
    else:
        rounded_predictions[target] = pred  # No mapping needed for non-categorical targets

# Update 'Sales Data (units)' prediction if needed
if 'Sales Data (units)' in predictions:
    sales_prediction = predictions['Sales Data (units)']
    if isinstance(sales_prediction, np.float64):
        rounded_sales_prediction = round(sales_prediction)  # Round to nearest integer
        rounded_predictions['Sales Data (units)'] = rounded_sales_prediction
    else:
        rounded_predictions['Sales Data (units)'] = sales_prediction

# Print or use predictions as needed
print("Mapped Predictions:", rounded_predictions)


Mapped Predictions: {'BrandName': 'Gucci', 'Category': 'Footwear', 'Individual_Category': 'Boots', 'Category_by_Gender': 'Men', 'Sales Data (units)': 161, 'PrimaryColor': 'White', 'Size': 'L', 'Age Range': '25-34', 'Material': 'Denim', 'Pattern': 'Printed', 'Style': 'Minimalist Fashion', 'Sub-Style': 'Loose Fit'}
