In [43]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
import difflib

In [53]:
class EcoCartRecommender:
    def __init__(self, product_file, event_file, profile_file):
        self.products = pd.read_csv(product_file)
        self.events = pd.read_csv(event_file)
        self.profiles = pd.read_csv(profile_file)

        self.popularity = self._compute_popularity()
        self.model = self._train_score_model()

    def _compute_popularity(self):
        pop = self.events.groupby('itemID')['event_type'].value_counts().unstack().fillna(0)
        pop['popularity_score'] = 1 * pop.get('view', 0) + 2 * pop.get('cart', 0) + 3 * pop.get('purchase', 0)
        return pop['popularity_score']

    def _generate_training_data(self):
        merged = self.events.merge(self.products, on='itemID')
        merged = merged.merge(self.profiles, on='user_id')

        merged['popularity'] = merged['itemID'].map(self.popularity).fillna(0)
        merged['eco_score'] = merged['Total(kg CO2e)']
        merged['eco_score_norm'] = (merged['eco_score'] - merged['eco_score'].min()) / (merged['eco_score'].max() - merged['eco_score'].min())

        merged['category_match'] = 1
        merged['refurbished'] = merged['refurbished_available']
        merged['price_penalty'] = merged['price'] / merged['avg_spend']

        merged['label'] = merged['event_type'].apply(lambda x: 1 if x in ['purchase', 'cart'] else 0)

        features = merged[['popularity', 'eco_score_norm', 'category_match', 'refurbished', 'price_penalty']]
        labels = merged['label']
        return features, labels

    def _train_score_model(self):
        X, y = self._generate_training_data()
        model = LogisticRegression()
        model.fit(X, y)
        return model

    def _compute_features(self, product_row, user_profile):
        pop = self.popularity.get(product_row['itemID'], 0)
        eco_score = product_row['Total(kg CO2e)']
        eco_score_norm = (eco_score - self.products['Total(kg CO2e)'].min()) / (self.products['Total(kg CO2e)'].max() - self.products['Total(kg CO2e)'].min())
        category_match = 1
        refurbished = product_row['refurbished_available']
        price_penalty = product_row['price'] / user_profile['avg_spend']
        return pd.DataFrame([[pop, eco_score_norm, category_match, refurbished, price_penalty]], 
                            columns=['popularity', 'eco_score_norm', 'category_match', 'refurbished', 'price_penalty'])

    def recommend_for_user(self, user_id, top_n=10, green_mode=True, search_query=None):
        user_profile = self.profiles[self.profiles['user_id'] == user_id].squeeze()
        scores = []

        if search_query:
            search_query_lower = search_query.lower()
            product_types = self.products['Product'].dropna().unique()
            match = difflib.get_close_matches(search_query_lower, product_types, n=1, cutoff=0.3)
            matched_category = match[0] if match else None

            main_products = self.products[self.products['Product'].str.lower() == matched_category.lower()] if matched_category else pd.DataFrame()
            alt_products = self.products[self.products['Product'].str.lower() != matched_category.lower()] if matched_category else self.products

            filtered_products = pd.concat([main_products, alt_products])
        else:
            filtered_products = self.products

        for _, row in filtered_products.iterrows():
            features = self._compute_features(row, user_profile)
            score = self.model.predict_proba(features)[0][1]
            scores.append((score, row))

        if search_query and matched_category:
            same_cat = [(s, r) for s, r in scores if r['Product'].lower() == matched_category.lower()]
            same_cat_sorted = sorted(same_cat, key=lambda x: (-x[0], x[1]['Total(kg CO2e)']))

            same_cat_ids = {r['itemID'] for _, r in same_cat}
            same_cat_low = [(s, r) for s, r in scores if r['Product'].lower() == matched_category.lower() and r['itemID'] not in same_cat_ids]
            same_cat_low_sorted = sorted(same_cat_low, key=lambda x: x[1]['Total(kg CO2e)'])

            other_cat = [(s, r) for s, r in scores if r['Product'].lower() != matched_category.lower()]
            other_cat_sorted = sorted(other_cat, key=lambda x: x[1]['Total(kg CO2e)'])

            top_items = [r for _, r in same_cat_sorted[:top_n]]
            remaining = top_n - len(top_items)
            if remaining > 0:
                top_items += [r for _, r in same_cat_low_sorted[:remaining]]
                remaining = top_n - len(top_items)
                if remaining > 0:
                    top_items += [r for _, r in other_cat_sorted[:remaining]]
        else:
            sorted_items = sorted(scores, key=lambda x: (-x[0], x[1]['Total(kg CO2e)']) if green_mode else -x[0])
            top_items = [r for _, r in sorted_items[:top_n]]

        top_items_df = pd.DataFrame(top_items)
        return top_items_df[['itemID', 'Product', 'Variety/Size', 'Brand', 'price', 'Total(kg CO2e)', 'refurbished_available']]

In [66]:
## Faster Scaling the recommender
class EcoCartRecommender_faster:
    def __init__(self, product_file, event_file, profile_file):
        self.products = pd.read_csv(product_file)
        self.events = pd.read_csv(event_file)
        self.profiles = pd.read_csv(profile_file)

        self.popularity = self._compute_popularity()
        self.model = self._train_score_model()

    def _compute_popularity(self):
        pop = self.events.groupby('itemID')['event_type'].value_counts().unstack().fillna(0)
        pop['popularity_score'] = 1 * pop.get('view', 0) + 2 * pop.get('cart', 0) + 3 * pop.get('purchase', 0)
        return pop['popularity_score']

    def _generate_training_data(self):
        merged = self.events.merge(self.products, on='itemID')
        merged = merged.merge(self.profiles, on='user_id')

        merged['popularity'] = merged['itemID'].map(self.popularity).fillna(0)
        merged['eco_score'] = merged['Total(kg CO2e)']
        merged['eco_score_norm'] = (merged['eco_score'] - merged['eco_score'].min()) / (merged['eco_score'].max() - merged['eco_score'].min())

        merged['category_match'] = 1
        merged['refurbished'] = merged['refurbished_available']
        merged['price_penalty'] = merged['price'] / merged['avg_spend']

        merged['label'] = merged['event_type'].apply(lambda x: 1 if x in ['purchase', 'cart'] else 0)

        features = merged[['popularity', 'eco_score_norm', 'category_match', 'refurbished', 'price_penalty']]
        labels = merged['label']
        return features, labels

    def _train_score_model(self):
        X, y = self._generate_training_data()
        model = LogisticRegression()
        model.fit(X, y)
        return model

    def recommend_for_user(self, user_id, top_n=10, green_mode=True, search_query=None):
        user_profile = self.profiles[self.profiles['user_id'] == user_id].squeeze()

        if search_query:
            search_query_lower = search_query.lower()
            product_types = self.products['Product'].dropna().unique()
            match = difflib.get_close_matches(search_query_lower, product_types, n=1, cutoff=0.3)
            matched_category = match[0] if match else None

            main_products = self.products[self.products['Product'].str.lower() == matched_category.lower()] if matched_category else pd.DataFrame()
            alt_products = self.products[self.products['Product'].str.lower() != matched_category.lower()] if matched_category else self.products

            filtered_products = pd.concat([main_products, alt_products])
        else:
            filtered_products = self.products

        filtered_products = filtered_products.copy()
        filtered_products['popularity'] = filtered_products['itemID'].map(self.popularity).fillna(0)
        filtered_products['eco_score_norm'] = (
            (filtered_products['Total(kg CO2e)'] - self.products['Total(kg CO2e)'].min()) /
            (self.products['Total(kg CO2e)'].max() - self.products['Total(kg CO2e)'].min())
        )
        filtered_products['category_match'] = 1
        filtered_products['refurbished'] = filtered_products['refurbished_available']
        filtered_products['price_penalty'] = filtered_products['price'] / user_profile['avg_spend']

        X = filtered_products[['popularity', 'eco_score_norm', 'category_match', 'refurbished', 'price_penalty']]
        probas = self.model.predict_proba(X)[:, 1]
        filtered_products['score'] = probas

        scores = list(zip(filtered_products['score'], filtered_products.to_dict(orient='records')))

        if search_query and matched_category:
            same_cat = [(s, r) for s, r in scores if r['Product'].lower() == matched_category.lower()]
            same_cat_sorted = sorted(same_cat, key=lambda x: (-x[0], x[1]['Total(kg CO2e)']))

            same_cat_low = [(s, r) for s, r in scores if r['Product'].lower() == matched_category.lower() and r not in [r2 for _, r2 in same_cat_sorted]]
            same_cat_low_sorted = sorted(same_cat_low, key=lambda x: x[1]['Total(kg CO2e)'])

            other_cat = [(s, r) for s, r in scores if r['Product'].lower() != matched_category.lower()]
            other_cat_sorted = sorted(other_cat, key=lambda x: x[1]['Total(kg CO2e)'])

            top_items = [r for _, r in same_cat_sorted[:top_n]]
            remaining = top_n - len(top_items)
            if remaining > 0:
                top_items += [r for _, r in same_cat_low_sorted[:remaining]]
                remaining = top_n - len(top_items)
                if remaining > 0:
                    top_items += [r for _, r in other_cat_sorted[:remaining]]
        else:
            sorted_items = sorted(scores, key=lambda x: (-x[0], x[1]['Total(kg CO2e)']) if green_mode else -x[0])
            top_items = [r for _, r in sorted_items[:top_n]]

        top_items_df = pd.DataFrame(top_items)
        return top_items_df[['itemID', 'Product', 'Variety/Size', 'Brand', 'price', 'Total(kg CO2e)', 'refurbished_available']]

In [54]:
recommender = EcoCartRecommender(
    product_file="Dataset/Synthetic_Dataset/products_catalog.csv",
    event_file="Dataset/Synthetic_Dataset/user_events.csv",
    profile_file="Dataset/Synthetic_Dataset/user_profiles.csv"
)


In [67]:
recommender_1 = EcoCartRecommender_faster(
    product_file="Dataset/Synthetic_Dataset/products_catalog.csv",
    event_file="Dataset/Synthetic_Dataset/user_events.csv",
    profile_file="Dataset/Synthetic_Dataset/user_profiles.csv"
)


In [68]:
user_id = 101  # Change as needed
print("\n--- Green Mode Recommendations ---")
# print(recommender.recommend_for_user(user_id, green_mode=True))
print(recommender_1.recommend_for_user(user_id, green_mode=True))


--- Green Mode Recommendations ---
   itemID                Product Variety/Size      Brand  price  \
0     245        Television (TV)      43 inch     Xiaomi  32063   
1     132        Washing Machine         8 kg    Samsung  29627   
2     135  Refrigerator (Fridge)        500 L     Godrej  55454   
3     393   Air Conditioner (AC)        2 Ton    Hitachi  48040   
4     241    Geyser/Water Heater          6 L     Racold   3853   
5      20                 Cooler         20 L   Symphony   5549   
6      60  Refrigerator (Fridge)        340 L         LG  36613   
7     383   Air Conditioner (AC)        1 Ton         LG  34524   
8     488                   Fans      48 inch    Havells   2860   
9     479        Washing Machine         6 kg  Whirlpool  20203   

   Total(kg CO2e)  refurbished_available  
0          161.71                      1  
1          171.00                      1  
2          311.85                      1  
3          416.30                      1  
4          

In [69]:
user_id = 101  # Change as needed
print("\n--- Green Mode Recommendations ---")
# print(recommender.recommend_for_user(user_id, green_mode=True, search_query='Washing Machine'))
print(recommender_1.recommend_for_user(user_id, green_mode=True, search_query='Washing Machine'))


--- Green Mode Recommendations ---
   itemID          Product Variety/Size      Brand  price  Total(kg CO2e)  \
0     132  Washing Machine         8 kg    Samsung  29627          171.00   
1     479  Washing Machine         6 kg  Whirlpool  20203          115.20   
2     452  Washing Machine         8 kg    Samsung  34021          171.00   
3      34  Washing Machine         9 kg         LG  30855          211.68   
4     397  Washing Machine         8 kg      Bosch  33907          172.80   
5      93  Washing Machine         6 kg         LG  23777          117.60   
6     232  Washing Machine         7 kg        IFB  29185          139.68   
7      59  Washing Machine         8 kg     Godrej  36553          178.20   
8     438  Washing Machine         7 kg  Whirlpool  20944          138.24   
9     147  Washing Machine         7 kg      Bosch  29009          138.24   

   refurbished_available  
0                      1  
1                      1  
2                      1  
3       

In [70]:
user_id = 101  # Change as needed
print("\n--- Green Mode Recommendations ---")
# print(recommender.recommend_for_user(user_id, green_mode=True, search_query='Fans'))
print(recommender_1.recommend_for_user(user_id, green_mode=True, search_query='Fans'))


--- Green Mode Recommendations ---
   itemID Product Variety/Size     Brand  price  Total(kg CO2e)  \
0     488    Fans      48 inch   Havells   2860           24.00   
1     478    Fans      48 inch     Bajaj   2663           24.24   
2       5    Fans      48 inch    Orient   2951           26.16   
3     269    Fans      52 inch  Crompton   3251           30.50   
4     386    Fans      48 inch  Atomberg   2294           25.20   
5     463    Fans      48 inch     Bajaj   2767           24.24   
6     237    Fans      48 inch     Bajaj   2335           24.24   
7      69    Fans      52 inch  Atomberg   3081           32.02   
8     177    Fans      48 inch    Orient   2023           26.16   
9     292    Fans      48 inch   Havells   2033           24.00   

   refurbished_available  
0                      1  
1                      1  
2                      1  
3                      0  
4                      1  
5                      1  
6                      1  
7         

In [58]:
print(recommender.products['Product'].unique())

['Gas Stove/Cooktop' 'Television (TV)' 'Air Conditioner (AC)'
 'Mixer Grinder' 'Fans' 'Cooler' 'Washing Machine' 'Refrigerator (Fridge)'
 'Geyser/Water Heater' 'Water Purifier']


In [59]:
print("\n--- Green Mode Recommendations ---")
print(recommender.recommend_for_user(user_id, green_mode=True, search_query='Water Purifier'))


--- Green Mode Recommendations ---
     itemID         Product Variety/Size      Brand  price  Total(kg CO2e)  \
51       51  Water Purifier     RO+UV+UF        HUL  13222           92.00   
262     262  Water Purifier     RO+UV+UF  Blue Star  10446           92.92   
108     108  Water Purifier     RO+UV+UF     Pureit  19224           90.16   
369     369  Water Purifier        RO+UV        HUL   8296           70.00   
234     234  Water Purifier     RO+UV+UF       Kent  18954           92.00   
462     462  Water Purifier     RO+UV+UF  Aquaguard  13526           93.84   
181     181  Water Purifier     RO+UV+UF        HUL  14616           92.00   
109     109  Water Purifier     RO+UV+UF  Blue Star  12226           92.92   
89       89  Water Purifier     RO+UV+UF  Blue Star  15913           92.92   
319     319  Water Purifier     RO+UV+UF    Livpure  11139           96.60   

     refurbished_available  
51                       0  
262                      0  
108               

In [60]:
print("\n--- Green Mode Recommendations ---")
print(recommender.recommend_for_user(user_id, green_mode=True, search_query='Television (TV)'))


--- Green Mode Recommendations ---
     itemID          Product Variety/Size    Brand  price  Total(kg CO2e)  \
245     245  Television (TV)      43 inch   Xiaomi  32063          161.71   
1         1  Television (TV)      65 inch     Sony  65187          338.40   
106     106  Television (TV)      43 inch       Vu  38538          172.70   
101     101  Television (TV)      65 inch  Samsung  64474          267.90   
214     214  Television (TV)      50 inch   Xiaomi  49242          200.85   
131     131  Television (TV)      32 inch       LG  20524          127.40   
226     226  Television (TV)      50 inch   Xiaomi  36715          200.85   
156     156  Television (TV)      43 inch     Sony  29865          188.40   
442     442  Television (TV)      32 inch   Xiaomi  26068          133.90   
445     445  Television (TV)      50 inch       Vu  46306          214.50   

     refurbished_available  
245                      1  
1                        0  
106                      1  


In [61]:
print("\n--- Green Mode Recommendations ---")
print(recommender.recommend_for_user(user_id, green_mode=True, search_query='Refrigerator (Fridge)'))


--- Green Mode Recommendations ---
     itemID                Product Variety/Size      Brand  price  \
135     135  Refrigerator (Fridge)        500 L     Godrej  55454   
60       60  Refrigerator (Fridge)        340 L         LG  36613   
468     468  Refrigerator (Fridge)        300 L     Godrej  28532   
157     157  Refrigerator (Fridge)        240 L     Godrej  27424   
425     425  Refrigerator (Fridge)        240 L  Panasonic  23022   
79       79  Refrigerator (Fridge)        500 L    Samsung  43541   
144     144  Refrigerator (Fridge)        240 L  Whirlpool  25267   
238     238  Refrigerator (Fridge)        340 L  Whirlpool  31609   
322     322  Refrigerator (Fridge)        500 L  Whirlpool  41721   
291     291  Refrigerator (Fridge)        340 L  Whirlpool  38927   

     Total(kg CO2e)  refurbished_available  
135          311.85                      1  
60           266.56                      1  
468          242.55                      1  
157          215.82     

In [65]:
user_id = 1  # Change as needed
print("\n--- Green Mode Recommendations ---")
recommender.recommend_for_user(user_id, top_n=50, green_mode=True)


--- Green Mode Recommendations ---


Unnamed: 0,itemID,Product,Variety/Size,Brand,price,Total(kg CO2e),refurbished_available
245,245,Television (TV),43 inch,Xiaomi,32063,161.71,1
132,132,Washing Machine,8 kg,Samsung,29627,171.0,1
135,135,Refrigerator (Fridge),500 L,Godrej,55454,311.85,1
393,393,Air Conditioner (AC),2 Ton,Hitachi,48040,416.3,1
60,60,Refrigerator (Fridge),340 L,LG,36613,266.56,1
383,383,Air Conditioner (AC),1 Ton,LG,34524,240.1,0
1,1,Television (TV),65 inch,Sony,65187,338.4,0
241,241,Geyser/Water Heater,6 L,Racold,3853,49.44,1
468,468,Refrigerator (Fridge),300 L,Godrej,28532,242.55,1
20,20,Cooler,20 L,Symphony,5549,45.1,1
