In [1]:
import os
import pandas as pd
from flask import Flask, request, jsonify
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib

app = Flask(__name__)

# File paths for different verticals
file_paths = {
    'Apparel & Fashion': r"..\content\Apparel & Fashion.csv",
    'Dmart': r"..\content\Dmart.csv",
    'Food': r"..\content\Food Data.csv",
    'Gifts & Crafts': r"..\content\Gifts & Crafts.csv",
    'Grocery': r"..\content\Grocery Data.csv",
    'Meat': r"..\content\Meat Data.csv",
    'Pharma': r"..\content\Pharma Data.csv",
    'Security & Protection': r"..\content\Security & Protection.csv",
    'Transportation': r"..\content\Transportation.csv"
}

# Load CSV files into DataFrames
dataframes = {vertical: pd.read_csv(path) for vertical, path in file_paths.items()}

# Function to train the Random Forest model on the given dataset
def train_model(df, target_column):
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(df['Product Name'].fillna(''))
    y = df[target_column].astype(str)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    return model, vectorizer

# Train the model and vectorizer for each vertical
models = {}
vectorizers = {}
for vertical, df in dataframes.items():
    model, vectorizer = train_model(df, 'Category')
    models[vertical] = model
    vectorizers[vertical] = vectorizer

# Save the models and vectorizers
for vertical in file_paths.keys():
    joblib.dump(models[vertical], f'random_forest_{vertical.replace(" ", "_").lower()}.pkl')
    joblib.dump(vectorizers[vertical], f'random_forest_vectorizer_{vertical.replace(" ", "_").lower()}.pkl')