In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from flask import Flask, request, jsonify

# Load the dataset
data = pd.read_csv('Orders_New_query_2024_07_01.csv')  # Replace with your dataset path

# Feature selection: Choose attributes that describe the product
features = [
    'ordered_quantity', 'tax_percent', 'net_sales_before_tax', 
    'gross_merchandise_value', 'refund_status', 'rto_status', 
    'cancellation_status', 'order_status', 'payment_status',
    'gift_wrap_expense', 'packaging_expense', 'handling_expense',
    'shipping_expense', 'marketplace_expense', 'payment_gateway_expense',
    'other_adjustments'
]

# Preprocessing: Fill missing values and normalize
data.fillna(0, inplace=True)  # Handle missing values
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])  # Normalize numerical columns

# Creating product profiles based on features
product_profiles = data.groupby('sku_id')[features].mean()

# Calculate similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(product_profiles)
similarity_df = pd.DataFrame(similarity_matrix, index=product_profiles.index, columns=product_profiles.index)

# Flask app setup
app = Flask(__name__)

@app.route('/recommend', methods=['GET'])
def recommend():
    # Get the SKU_ID from the user
    sku_id = request.args.get('sku_id')
    
    if sku_id not in similarity_df.index:
        return jsonify({"error": "SKU_ID not found"}), 404

    # Get the top 5 similar products for the given SKU_ID
    similar_products = similarity_df[sku_id].sort_values(ascending=False).head(6).index.tolist()
    similar_products.remove(sku_id)  # Remove the queried product itself from recommendations

    return jsonify({"sku_id": sku_id, "recommended_sku_ids": similar_products})

if __name__ == '__main__':
    app.run(debug=True)
