In [None]:
import duckdb
import glob
import os
import pandas as pd
from scipy.sparse import coo_matrix
from implicit.als import AlternatingLeastSquares

CLEANED_DIR = "/root/cleaned_parquets"
all_files = glob.glob(os.path.join(CLEANED_DIR, "*_merged.parquet"))

df_list = []

# Load relevant data in batches
for file in all_files:
    print(f"Loading from {os.path.basename(file)}")
    try:
        df = duckdb.sql(f"""
            SELECT user_id, asin, rating
            FROM '{file}'
            WHERE rating BETWEEN 1 AND 5
            LIMIT 50000
        """).df()
        df_list.append(df)
    except Exception as e:
        print(f"Skipped {file}: {e}")

# Combine all batches
df = pd.concat(df_list, ignore_index=True).dropna()

# Encode IDs
user_map = {u: i for i, u in enumerate(df['user_id'].unique())}
item_map = {a: j for j, a in enumerate(df['asin'].unique())}
df['user_idx'] = df['user_id'].map(user_map)
df['item_idx'] = df['asin'].map(item_map)

# Create sparse matrix (item-user for training)
ratings_matrix = coo_matrix((df['rating'], (df['user_idx'], df['item_idx'])))
ratings_matrix_csr = ratings_matrix.tocsr()

# Train ALS
model = AlternatingLeastSquares(factors=50, iterations=15, regularization=0.1)
model.fit(ratings_matrix.T)  # transpose = item-user

# Recommend for 3 users
print("Sample Recommendations:")
for user_id in list(user_map.keys())[:3]:
    user_idx = user_map[user_id]
    user_ratings = ratings_matrix_csr[user_idx]  # 🔥 Extract only that user's row
    recs = model.recommend(user_idx, user_ratings, N=5)
    item_ids = [list(item_map.keys())[i] for i, _ in recs]
    print(f"User {user_id}: {item_ids}")


  from .autonotebook import tqdm as notebook_tqdm


Loading from Amazon_Fashion_merged.parquet
Loading from Software_merged.parquet
Loading from Health_and_Personal_Care_merged.parquet
Loading from Musical_Instruments_merged.parquet
Loading from Home_and_Kitchen_merged.parquet
Skipped /root/cleaned_parquets/Home_and_Kitchen_merged.parquet: Invalid Input Error: No magic bytes found at end of file '/root/cleaned_parquets/Home_and_Kitchen_merged.parquet'
Loading from Handmade_Products_merged.parquet
Loading from Unknown_merged.parquet
Skipped /root/cleaned_parquets/Unknown_merged.parquet: Invalid Input Error: No magic bytes found at end of file '/root/cleaned_parquets/Unknown_merged.parquet'
Loading from Electronics_merged.parquet
Loading from Patio_Lawn_and_Garden_merged.parquet
Loading from Office_Products_merged.parquet
Loading from Kindle_Store_merged.parquet
Skipped /root/cleaned_parquets/Kindle_Store_merged.parquet: Invalid Input Error: No magic bytes found at end of file '/root/cleaned_parquets/Kindle_Store_merged.parquet'
Loading f

  check_blas_config()
100%|██████████| 15/15 [01:01<00:00,  4.08s/it]


🎯 Sample Recommendations:


ValueError: user_items must contain 1 row for every user in userids