In [1]:
import pandas as pd
import json

# Original dataset
df_orig = pd.read_csv("consolidated_file_cleaned_v2.csv")
df_orig['time'] = pd.to_datetime(df_orig['time'])
df_orig = df_orig[df_orig['suitable_for_seasonal_analysis'] == True]
df_orig['month'] = df_orig['time'].dt.month.astype(str).str.zfill(2)

In [2]:
season_mapping = {
    'rainy': ['06','07','08','09','10','11'],
    'halloween': ['10','11'],
    'christmas': ['09','10','11','12'],
    'valentines': ['02'],
    'holyweek': ['03','04'],
    'summer': ['03','04','05'],
    'back-to-school': ['06','07','08','09']
}

def assign_seasons_multi(month):
    seasons = []
    for s, months in season_mapping.items():
        if month in months:
            seasons.append(s)
    return seasons

df_orig['seasons'] = df_orig['month'].apply(assign_seasons_multi)
df_exploded = df_orig.explode('seasons')

In [3]:
with open("common_categories.json", "r") as f:
    common_json = json.load(f)

common_lookup = {}
for season, toplist in common_json.items():
    common_lookup[season] = {}
    for d in toplist:
        for top_cat, sec_list in d.items():
            common_lookup[season][top_cat] = set(sec_list)

In [4]:
def is_uncommon(row):
    season = row['seasons']
    top_cat = row['top-level_category']
    sec_cat = row['second-level_category']
    if season in common_lookup:
        if top_cat in common_lookup[season]:
            return sec_cat not in common_lookup[season][top_cat]
    return True

df_uncommon = df_exploded[df_exploded.apply(is_uncommon, axis=1)]

In [5]:
# Keep only products that actually sold something
df_uncommon_demand = df_uncommon.groupby(
    ['seasons','top-level_category','second-level_category']
)['sold/m'].sum().reset_index()

# Filter out zero-demand products
df_uncommon_demand = df_uncommon_demand[df_uncommon_demand['sold/m'] > 0]

In [8]:
# Aggregate demand per season
# Sum sold/m for each uncommon category per season
df_uncommon_demand = df_uncommon.groupby(
    ['seasons','top-level_category','second-level_category']
)['sold/m'].sum().reset_index()

# Filter out zero-demand products AND those with sold/m < 1,000,000
THRESHOLD = 1_000_000
df_uncommon_demand = df_uncommon_demand[df_uncommon_demand['sold/m'] >= THRESHOLD]

In [9]:
season_dict = {}
for season in df_uncommon_demand['seasons'].unique():
    season_dict[season] = []
    top_cats = df_uncommon_demand[df_uncommon_demand['seasons']==season]['top-level_category'].unique()
    for top_cat in top_cats:
        sec_list = df_uncommon_demand[
            (df_uncommon_demand['seasons']==season) &
            (df_uncommon_demand['top-level_category']==top_cat)
        ].sort_values('sold/m', ascending=False)['second-level_category'].tolist()
        season_dict[season].append({top_cat: sec_list})

with open('uncommon_categories_in_demand.json','w') as f:
    json.dump(season_dict, f, indent=4)

print("✅ JSON exported: uncommon_categories_in_demand.json")

✅ JSON exported: uncommon_categories_in_demand.json


In [11]:
import pandas as pd
import json

# -----------------------------
# 1. Load JSON and forecast
# -----------------------------
with open("common_categories.json", "r") as f:
    common_json = json.load(f)

# Load hybrid forecasts of uncommon products
hybrid_uncommon = pd.read_json("hybrid_forecasts_uncommon_only.json")

# Map month to season (same as before)
season_mapping = {
    'rainy': ['06','07','08','09','10','11'],
    'halloween': ['10','11'],
    'christmas': ['09','10','11','12'],
    'valentine': ['02'],
    'holy_week': ['03','04'],
    'summer': ['03','04','05'],
    'back_to_school': ['06','07','08','09']
}

def assign_season(month):
    month_str = str(month).zfill(2)
    for season, months in season_mapping.items():
        if month_str in months:
            return season
    return None

hybrid_uncommon['month'] = pd.to_datetime(hybrid_uncommon['forecast_date']).dt.month.astype(str).str.zfill(2)
hybrid_uncommon['season'] = hybrid_uncommon['month'].apply(assign_season)

# -----------------------------
# 2. Remove common categories per season
# -----------------------------
def is_truly_uncommon(row):
    season = row['season']
    top_cat = row['top-level_category']
    sec_cat = row['second-level_category']
    # Check if this top+second category is in common for the season
    if season in common_json:
        for d in common_json[season]:
            if top_cat in d and sec_cat in d[top_cat]:
                return False
    return True

hybrid_only = hybrid_uncommon[hybrid_uncommon.apply(is_truly_uncommon, axis=1)]

# -----------------------------
# 3. Aggregate uncommon categories for frontend JSON
# -----------------------------
# Keep positive forecast
hybrid_only = hybrid_only[hybrid_only['forecast_sold_per_month'] > 0]

season_dict = {}
for season in hybrid_only['season'].unique():
    season_dict[season] = []
    tops = hybrid_only[hybrid_only['season']==season]['top-level_category'].unique()
    for top_cat in tops:
        sec_list = hybrid_only[
            (hybrid_only['season']==season) &
            (hybrid_only['top-level_category']==top_cat)
        ]['second-level_category'].tolist()
        season_dict[season].append({top_cat: sec_list})

# -----------------------------
# 4. Export JSON
# -----------------------------
with open("uncommon_categories_filtered.json", "w") as f:
    json.dump(season_dict, f, indent=4)

print("✅ JSON exported: uncommon_categories_filtered.json")

✅ JSON exported: uncommon_categories_filtered.json
