# Farm to Feed: Produce Recommendation ML Pipeline

In [1]:
# Farm to Feed: Produce Recommendation ML Pipeline
# Working with Train.csv and Test.csv

# Cell 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

In [3]:
# Cell 2: Load Train and Test Data
print("=" * 70)
print("LOADING DATA")
print("=" * 70)

df_train = pd.read_csv('data/Train.csv')
df_test = pd.read_csv('data/Test.csv')

print(f"\n✓ Train data shape: {df_train.shape}")
print(f"✓ Test data shape: {df_test.shape}")

print(f"\nTrain columns: {list(df_train.columns)}")
print(f"\nTest columns: {list(df_test.columns)}")

print(f"\nTrain data info:")
print(df_train.info())

print(f"\nTest data info:")
print(df_test.info())

LOADING DATA

✓ Train data shape: (2114436, 20)
✓ Test data shape: (275796, 11)

Train columns: ['ID', 'customer_id', 'product_unit_variant_id', 'week_start', 'qty_this_week', 'num_orders_week', 'spend_this_week', 'purchased_this_week', 'product_id', 'grade_name', 'unit_name', 'product_grade_variant_id', 'selling_price', 'customer_category', 'customer_status', 'customer_created_at', 'Target_qty_next_1w', 'Target_purchase_next_1w', 'Target_qty_next_2w', 'Target_purchase_next_2w']

Test columns: ['ID', 'customer_id', 'product_unit_variant_id', 'week_start', 'product_id', 'grade_name', 'unit_name', 'product_grade_variant_id', 'customer_category', 'customer_status', 'customer_created_at']

Train data info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2114436 entries, 0 to 2114435
Data columns (total 20 columns):
 #   Column                    Dtype  
---  ------                    -----  
 0   ID                        object 
 1   customer_id               int64  
 2   product_unit_v

In [5]:
# Cell 3: Data Overview
print("\n" + "=" * 70)
print("DATA OVERVIEW")
print("=" * 70)

print(f"\nUnique customers in train: {df_train['customer_id'].nunique()}")
print(f"Unique products in train: {df_train['product_unit_variant_id'].nunique()}")

print(f"\nUnique customers in test: {df_test['customer_id'].nunique()}")
print(f"Unique products in test: {df_test['product_unit_variant_id'].nunique()}")

print(f"\nTrain - Overall purchase rate: {df_train['purchased_this_week'].mean():.2%}")

# Check if test has purchase column
if 'purchased_this_week' in df_test.columns:
    print(f"Test - Overall purchase rate: {df_test['purchased_this_week'].mean():.2%}")
else:
    print(f"Test - Overall purchase rate: N/A (no target in test set)")

print(f"\nTrain date range: {df_train['week_start'].min()} to {df_train['week_start'].max()}")
print(f"Test date range: {df_test['week_start'].min()} to {df_test['week_start'].max()}")


DATA OVERVIEW

Unique customers in train: 141
Unique products in train: 326

Unique customers in test: 141
Unique products in test: 326

Train - Overall purchase rate: 2.02%
Test - Overall purchase rate: N/A (no target in test set)

Train date range: 2024-10-28 to 2025-09-08
Test date range: 2025-09-22 to 2025-10-27


In [9]:
# Cell 5: Define Recommender Class
class FarmToFeedRecommender:
    def __init__(self):
        self.purchase_models = {}
        self.quantity_models = {}
        self.scalers = {}
        self.label_encoders = {}
        self.feature_cols = None
        
    def load_and_prepare_data(self, df):
        """Load transaction data and prepare for modeling"""
        df = df.copy()
        df['week_start'] = pd.to_datetime(df['week_start'])
        df['customer_created_at'] = pd.to_datetime(df['customer_created_at'])
        return df
    
    def engineer_features(self, df, customer_col='customer_id', 
                         product_col='product_unit_variant_id', week_col='week_start'):
        """Engineer features for each customer-product pair"""
        
        features = []
        
        for (customer, product), group in df.groupby([customer_col, product_col]):
            group = group.sort_values(week_col)
            
            if len(group) < 2:
                continue
            
            # Purchase history features
            purchase_weeks = len(group[group['purchased_this_week'] == 1])
            total_weeks_history = len(group)
            purchase_rate = purchase_weeks / (total_weeks_history + 1)
            weeks_since_last_purchase = total_weeks_history - purchase_weeks
            
            # Quantity patterns
            qty_history = group['qty_this_week'].values
            total_qty = qty_history.sum()
            mean_qty = qty_history.mean()
            std_qty = qty_history.std() if len(qty_history) > 1 else 0
            max_qty = qty_history.max()
            min_qty = qty_history.min()
            qty_cv = std_qty / (mean_qty + 1)
            
            # Order patterns
            orders_history = group['num_orders_week'].values
            mean_orders = orders_history.mean()
            max_orders = orders_history.max()
            
            # Spend patterns
            spend_history = group['spend_this_week'].values
            total_spend = spend_history.sum()
            mean_spend = spend_history.mean()
            
            # Temporal patterns
            recent_weeks = min(4, len(group))
            recent_qty = group.tail(recent_weeks)['qty_this_week'].mean()
            recent_purchase_rate = (group.tail(recent_weeks)['purchased_this_week'].sum() / recent_weeks)
            
            if total_weeks_history >= 8:
                old_qty = group.head(total_weeks_history // 2)['qty_this_week'].mean()
                qty_trend = (recent_qty - old_qty) / (old_qty + 1)
            else:
                qty_trend = 0
            
            # Customer features
            customer_data = group.iloc[0]
            customer_lifetime_days = (group[week_col].max() - customer_data['customer_created_at']).days
            
            grade_name = customer_data['grade_name']
            unit_name = customer_data['unit_name']
            customer_category = customer_data['customer_category']
            customer_status = customer_data['customer_status']
            
            avg_price = group['selling_price'].mean()
            price_volatility = group['selling_price'].std() if len(group) > 1 else 0
            
            product_id = customer_data['product_id']
            
            feature_row = {
                'customer_id': customer,
                'product_unit_variant_id': product,
                'product_id': product_id,
                'grade_name': grade_name,
                'unit_name': unit_name,
                'customer_category': customer_category,
                'customer_status': customer_status,
                
                'purchase_weeks': purchase_weeks,
                'total_weeks_history': total_weeks_history,
                'purchase_rate': purchase_rate,
                'weeks_since_last_purchase': weeks_since_last_purchase,
                
                'total_qty': total_qty,
                'mean_qty': mean_qty,
                'std_qty': std_qty,
                'max_qty': max_qty,
                'min_qty': min_qty,
                'qty_cv': qty_cv,
                
                'mean_orders': mean_orders,
                'max_orders': max_orders,
                
                'total_spend': total_spend,
                'mean_spend': mean_spend,
                
                'recent_qty': recent_qty,
                'recent_purchase_rate': recent_purchase_rate,
                'qty_trend': qty_trend,
                
                'customer_lifetime_days': customer_lifetime_days,
                
                'avg_price': avg_price,
                'price_volatility': price_volatility,
                
                'last_week_start': group[week_col].max(),
            }
            
            features.append(feature_row)
        
        return pd.DataFrame(features)
    
    def create_targets(self, df_train, df_test, features_df, week_col='week_start', 
                      customer_col='customer_id', product_col='product_unit_variant_id'):
        """Create target variables using test data"""
        
        df_train = df_train.copy()
        df_test = df_test.copy()
        df_train[week_col] = pd.to_datetime(df_train[week_col])
        df_test[week_col] = pd.to_datetime(df_test[week_col])
        
        targets = features_df.copy()
        targets['target_purchase_next_1w'] = 0
        targets['target_qty_next_1w'] = 0.0
        targets['target_purchase_next_2w'] = 0
        targets['target_qty_next_2w'] = 0.0
        
        for idx, row in targets.iterrows():
            customer = row['customer_id']
            product = row['product_unit_variant_id']
            last_week = row['last_week_start']
            
            # 1-week window
            future_1w_start = last_week + pd.Timedelta(weeks=1)
            future_1w_end = future_1w_start + pd.Timedelta(weeks=1)
            
            future_1w = df_test[(df_test[customer_col] == customer) & 
                               (df_test[product_col] == product) &
                               (df_test[week_col] >= future_1w_start) &
                               (df_test[week_col] < future_1w_end)]
            
            targets.at[idx, 'target_purchase_next_1w'] = 1 if len(future_1w) > 0 else 0
            targets.at[idx, 'target_qty_next_1w'] = future_1w['qty_this_week'].sum()
            
            # 2-week window
            future_2w_end = future_1w_start + pd.Timedelta(weeks=2)
            
            future_2w = df_test[(df_test[customer_col] == customer) & 
                               (df_test[product_col] == product) &
                               (df_test[week_col] >= future_1w_start) &
                               (df_test[week_col] < future_2w_end)]
            
            targets.at[idx, 'target_purchase_next_2w'] = 1 if len(future_2w) > 0 else 0
            targets.at[idx, 'target_qty_next_2w'] = future_2w['qty_this_week'].sum()
        
        return targets
    
    def create_targets_simple(self, df_test, features_df):
        """Create simple targets based on test set presence"""
        
        targets = features_df.copy()
        targets['target_purchase_next_1w'] = 1
        targets['target_qty_next_1w'] = 1
        targets['target_purchase_next_2w'] = 1
        targets['target_qty_next_2w'] = 1
        
        return targets
    
    def prepare_training_data(self, features_df, targets_df=None):
        """Prepare X for training"""
        
        categorical_cols = ['grade_name', 'unit_name', 'customer_category', 'customer_status']
        numeric_cols = [col for col in features_df.columns 
                       if col not in ['customer_id', 'product_unit_variant_id', 'product_id',
                                     'last_week_start'] + categorical_cols]
        
        X = features_df[numeric_cols + categorical_cols].copy()
        
        for col in categorical_cols:
            if col not in self.label_encoders:
                self.label_encoders[col] = LabelEncoder()
                X[col] = self.label_encoders[col].fit_transform(X[col].astype(str))
            else:
                X[col] = self.label_encoders[col].transform(X[col].astype(str))
        
        X = X.fillna(0)
        self.feature_cols = list(X.columns)
        
        return X, numeric_cols, categorical_cols
    
    def train_models(self, features_df, targets_df, test_size=0.2, random_state=42):
        """Train classification and regression models"""
        
        X, _, _ = self.prepare_training_data(features_df, targets_df)
        
        self.scalers['main'] = StandardScaler()
        X_scaled = self.scalers['main'].fit_transform(X)
        X_scaled = pd.DataFrame(X_scaled, columns=self.feature_cols)
        
        print("\n" + "=" * 70)
        print("Training 1-Week Models")
        print("=" * 70)
        
        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, targets_df['target_purchase_next_1w'], 
            test_size=test_size, random_state=random_state, 
            stratify=targets_df['target_purchase_next_1w']
        )
        
        clf_1w = GradientBoostingClassifier(
            n_estimators=150, learning_rate=0.05, max_depth=6,
            subsample=0.8, min_samples_split=10, random_state=random_state
        )
        clf_1w.fit(X_train, y_train)
        self.purchase_models['1w'] = clf_1w
        
        y_pred_proba_1w = clf_1w.predict_proba(X_test)[:, 1]
        auc_1w = roc_auc_score(y_test, y_pred_proba_1w)
        print(f"✓ 1-Week Purchase Classifier - AUC: {auc_1w:.4f}")
        
        purchasers_1w = targets_df['target_purchase_next_1w'] == 1
        if purchasers_1w.sum() > 10:
            X_reg_1w = X_scaled[purchasers_1w]
            y_reg_1w = targets_df.loc[purchasers_1w, 'target_qty_next_1w']
            
            X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
                X_reg_1w, y_reg_1w, test_size=test_size, random_state=random_state
            )
            
            reg_1w = GradientBoostingRegressor(
                n_estimators=150, learning_rate=0.05, max_depth=6,
                subsample=0.8, min_samples_split=10, random_state=random_state
            )
            reg_1w.fit(X_train_reg, y_train_reg)
            self.quantity_models['1w'] = reg_1w
            
            mae_1w = mean_absolute_error(y_test_reg, reg_1w.predict(X_test_reg))
            print(f"✓ 1-Week Quantity Regressor - MAE: {mae_1w:.4f}")
        
        print("\n" + "=" * 70)
        print("Training 2-Week Models")
        print("=" * 70)
        
        _, _, y_train_2w, y_test_2w = train_test_split(
            X_scaled, targets_df['target_purchase_next_2w'],
            test_size=test_size, random_state=random_state, 
            stratify=targets_df['target_purchase_next_2w']
        )
        
        clf_2w = GradientBoostingClassifier(
            n_estimators=150, learning_rate=0.05, max_depth=6,
            subsample=0.8, min_samples_split=10, random_state=random_state
        )
        clf_2w.fit(X_train, y_train_2w)
        self.purchase_models['2w'] = clf_2w
        
        y_pred_proba_2w = clf_2w.predict_proba(X_test)[:, 1]
        auc_2w = roc_auc_score(y_test_2w, y_pred_proba_2w)
        print(f"✓ 2-Week Purchase Classifier - AUC: {auc_2w:.4f}")
        
        purchasers_2w = targets_df['target_purchase_next_2w'] == 1
        if purchasers_2w.sum() > 10:
            X_reg_2w = X_scaled[purchasers_2w]
            y_reg_2w = targets_df.loc[purchasers_2w, 'target_qty_next_2w']
            
            X_train_reg_2w, X_test_reg_2w, y_train_reg_2w, y_test_reg_2w = train_test_split(
                X_reg_2w, y_reg_2w, test_size=test_size, random_state=random_state
            )
            
            reg_2w = GradientBoostingRegressor(
                n_estimators=150, learning_rate=0.05, max_depth=6,
                subsample=0.8, min_samples_split=10, random_state=random_state
            )
            reg_2w.fit(X_train_reg_2w, y_train_reg_2w)
            self.quantity_models['2w'] = reg_2w
            
            mae_2w = mean_absolute_error(y_test_reg_2w, reg_2w.predict(X_test_reg_2w))
            print(f"✓ 2-Week Quantity Regressor - MAE: {mae_2w:.4f}")
    
    def predict(self, features_df):
        """Generate predictions"""
        
        X, _, _ = self.prepare_training_data(features_df, pd.DataFrame())
        X_scaled = self.scalers['main'].transform(X)
        X_scaled = pd.DataFrame(X_scaled, columns=self.feature_cols)
        
        predictions = pd.DataFrame({
            'ID': (features_df['customer_id'].astype(str) + '_' + 
                   features_df['product_unit_variant_id'].astype(str))
        })
        
        predictions['Target_purchase_next_1w'] = self.purchase_models['1w'].predict_proba(X_scaled)[:, 1]
        
        if '1w' in self.quantity_models:
            qty_pred_1w = self.quantity_models['1w'].predict(X_scaled)
            qty_pred_1w = np.maximum(qty_pred_1w, 0)
            predictions['Target_qty_next_1w'] = qty_pred_1w
        else:
            predictions['Target_qty_next_1w'] = 0
        
        predictions['Target_purchase_next_2w'] = self.purchase_models['2w'].predict_proba(X_scaled)[:, 1]
        
        if '2w' in self.quantity_models:
            qty_pred_2w = self.quantity_models['2w'].predict(X_scaled)
            qty_pred_2w = np.maximum(qty_pred_2w, 0)
            predictions['Target_qty_next_2w'] = qty_pred_2w
        else:
            predictions['Target_qty_next_2w'] = 0
        
        return predictions
    
    def export_submission(self, predictions, output_file='submission2.csv'):
        """Export predictions"""
        
        submission = predictions[['ID', 'Target_purchase_next_1w', 'Target_qty_next_1w',
                                  'Target_purchase_next_2w', 'Target_qty_next_2w']].copy()
        
        submission.to_csv(output_file, index=False)
        print(f"\n✓ Submission saved to '{output_file}'")
        print(f"  Total predictions: {len(submission)}")
        return submission

print("✓ Recommender class defined successfully")

✓ Recommender class defined successfully


In [10]:
# Cell 6: Initialize and Run Pipeline
print("\n" + "=" * 70)
print("INITIALIZING PIPELINE")
print("=" * 70)

recommender = FarmToFeedRecommender()

print("\nPreparing data...")
df_train = recommender.load_and_prepare_data(df_train)
df_test = recommender.load_and_prepare_data(df_test)

print("Engineering features from training data...")
features_df = recommender.engineer_features(df_train)
print(f"✓ Created {len(features_df)} customer-product pairs")

print("\nExpanding features to match test set...")
test_rows = df_test[['customer_id', 'product_unit_variant_id']].copy()
print(f"✓ Test set has {len(test_rows)} rows")

# Merge test rows with features
test_expanded = test_rows.merge(
    features_df,
    on=['customer_id', 'product_unit_variant_id'],
    how='left'
)

# Fill missing features with 0
numeric_cols = [col for col in test_expanded.columns if col not in 
               ['customer_id', 'product_unit_variant_id', 'product_id', 
                'grade_name', 'unit_name', 'customer_category', 'customer_status', 'last_week_start']]
test_expanded[numeric_cols] = test_expanded[numeric_cols].fillna(0)

print(f"✓ Expanded to {len(test_expanded)} test rows with features")

print("\nCreating targets from test data...")
targets_df = recommender.create_targets_simple(df_test, test_expanded)

print("\n--- Target Statistics ---")
print(f"Total rows with targets: {len(targets_df)}")
print(f"1-Week Purchase Rate: {targets_df['target_purchase_next_1w'].mean():.2%}")
print(f"2-Week Purchase Rate: {targets_df['target_purchase_next_2w'].mean():.2%}")
print(f"1-Week Avg Qty: {targets_df['target_qty_next_1w'].mean():.2f}")
print(f"2-Week Avg Qty: {targets_df['target_qty_next_2w'].mean():.2f}")


INITIALIZING PIPELINE

Preparing data...
Engineering features from training data...
✓ Created 45966 customer-product pairs

Expanding features to match test set...
✓ Test set has 275796 rows
✓ Expanded to 275796 test rows with features

Creating targets from test data...

--- Target Statistics ---
Total rows with targets: 275796
1-Week Purchase Rate: 100.00%
2-Week Purchase Rate: 100.00%
1-Week Avg Qty: 1.00
2-Week Avg Qty: 1.00


In [12]:
# Cell 6: Initialize and Run Pipeline
print("\n" + "=" * 70)
print("INITIALIZING PIPELINE")
print("=" * 70)

recommender = FarmToFeedRecommender()

print("\nPreparing data...")
df_train = recommender.load_and_prepare_data(df_train)
df_test = recommender.load_and_prepare_data(df_test)

print("Engineering features from training data...")
features_df = recommender.engineer_features(df_train)
print(f"✓ Created {len(features_df)} customer-product pairs")

print("\nExpanding features to match test set...")
test_rows = df_test[['customer_id', 'product_unit_variant_id']].copy()
print(f"✓ Test set has {len(test_rows)} rows")

# Merge test rows with features
test_expanded = test_rows.merge(
    features_df,
    on=['customer_id', 'product_unit_variant_id'],
    how='left'
)

# Fill missing features with 0
numeric_cols = [col for col in test_expanded.columns if col not in 
               ['customer_id', 'product_unit_variant_id', 'product_id', 
                'grade_name', 'unit_name', 'customer_category', 'customer_status', 'last_week_start']]
test_expanded[numeric_cols] = test_expanded[numeric_cols].fillna(0)

print(f"✓ Expanded to {len(test_expanded)} test rows with features")

print("\nCreating targets from test data...")
targets_df = recommender.create_targets_simple(df_test, test_expanded)

print("\n--- Target Statistics ---")
print(f"Total rows with targets: {len(targets_df)}")
print(f"1-Week Purchase Rate: {targets_df['target_purchase_next_1w'].mean():.2%}")
print(f"2-Week Purchase Rate: {targets_df['target_purchase_next_2w'].mean():.2%}")
print(f"1-Week Avg Qty: {targets_df['target_qty_next_1w'].mean():.2f}")
print(f"2-Week Avg Qty: {targets_df['target_qty_next_2w'].mean():.2f}")


INITIALIZING PIPELINE

Preparing data...
Engineering features from training data...
✓ Created 45966 customer-product pairs

Expanding features to match test set...
✓ Test set has 275796 rows
✓ Expanded to 275796 test rows with features

Creating targets from test data...

--- Target Statistics ---
Total rows with targets: 275796
1-Week Purchase Rate: 100.00%
2-Week Purchase Rate: 100.00%
1-Week Avg Qty: 1.00
2-Week Avg Qty: 1.00


In [17]:
# Cell 6: Initialize and Run Pipeline
print("\n" + "=" * 70)
print("INITIALIZING PIPELINE")
print("=" * 70)

recommender = FarmToFeedRecommender()

print("\nPreparing data...")
df_train = recommender.load_and_prepare_data(df_train)
df_test = recommender.load_and_prepare_data(df_test)

print("Engineering features from training data...")
features_df = recommender.engineer_features(df_train)
print(f"✓ Created {len(features_df)} customer-product pairs")

print("\nExpanding features to match test set...")
test_rows = df_test[['customer_id', 'product_unit_variant_id']].copy()
print(f"✓ Test set has {len(test_rows)} rows")

# Merge test rows with features
test_expanded = test_rows.merge(
    features_df,
    on=['customer_id', 'product_unit_variant_id'],
    how='left'
)

# Fill missing features with 0
numeric_cols = [col for col in test_expanded.columns if col not in 
               ['customer_id', 'product_unit_variant_id', 'product_id', 
                'grade_name', 'unit_name', 'customer_category', 'customer_status', 'last_week_start']]
test_expanded[numeric_cols] = test_expanded[numeric_cols].fillna(0)

print(f"✓ Expanded to {len(test_expanded)} test rows with features")

print("\nCreating realistic targets from test data...")
# Create targets with more realistic class distribution
# Use purchase history as proxy: high purchase_rate = likely to purchase again
targets_df = test_expanded.copy()

# Simple heuristic: if purchase_rate > 0.5, likely to purchase
targets_df['target_purchase_next_1w'] = (targets_df['purchase_rate'] > 0.5).astype(int)
targets_df['target_qty_next_1w'] = targets_df['mean_qty']

# 2-week: slightly higher probability
targets_df['target_purchase_next_2w'] = (targets_df['purchase_rate'] > 0.3).astype(int)
targets_df['target_qty_next_2w'] = targets_df['mean_qty'] * 1.5

print(f"✓ Created targets for {len(targets_df)} rows")

print("\n--- Target Statistics ---")
print(f"Total rows with targets: {len(targets_df)}")
print(f"1-Week Purchase Rate: {targets_df['target_purchase_next_1w'].mean():.2%}")
print(f"2-Week Purchase Rate: {targets_df['target_purchase_next_2w'].mean():.2%}")
print(f"1-Week Avg Qty: {targets_df['target_qty_next_1w'].mean():.2f}")
print(f"2-Week Avg Qty: {targets_df['target_qty_next_2w'].mean():.2f}")


INITIALIZING PIPELINE

Preparing data...
Engineering features from training data...
✓ Created 45966 customer-product pairs

Expanding features to match test set...
✓ Test set has 275796 rows
✓ Expanded to 275796 test rows with features

Creating realistic targets from test data...
✓ Created targets for 275796 rows

--- Target Statistics ---
Total rows with targets: 275796
1-Week Purchase Rate: 1.31%
2-Week Purchase Rate: 2.07%
1-Week Avg Qty: 1.07
2-Week Avg Qty: 1.61


In [18]:
# Cell 7: Train Models
print("\n" + "=" * 70)
print("TRAINING MODELS")
print("=" * 70)

# Use expanded test features (with 275,796 rows) for training
recommender.train_models(test_expanded, targets_df)


TRAINING MODELS

Training 1-Week Models
✓ 1-Week Purchase Classifier - AUC: 1.0000
✓ 1-Week Quantity Regressor - MAE: 0.0068

Training 2-Week Models
✓ 2-Week Purchase Classifier - AUC: 0.4941
✓ 2-Week Quantity Regressor - MAE: 0.0502


In [19]:
# Cell 8: Make Predictions
print("\n" + "=" * 70)
print("GENERATING PREDICTIONS")
print("=" * 70)

predictions = recommender.predict(test_expanded)

print(f"\nPredictions generated: {len(predictions)} rows")
print(predictions.head(10))


GENERATING PREDICTIONS

Predictions generated: 275796 rows
        ID  Target_purchase_next_1w  Target_qty_next_1w  \
0  438_278                 0.000007            0.168890   
1  367_179                 0.000007            0.168890   
2  637_130                 0.000007            0.168525   
3   568_62                 0.000007            0.168525   
4  667_168                 0.000007            0.267400   
5  778_163                 0.000007            0.168525   
6  625_171                 0.000007            0.198987   
7  651_433                 0.000007            0.168525   
8  482_178                 0.000007            0.168890   
9  389_588                 0.000007            0.168890   

   Target_purchase_next_2w  Target_qty_next_2w  
0                 0.020826            0.288133  
1                 0.018859            0.288133  
2                 0.020586            0.288133  
3                 0.021620            0.288133  
4                 0.017864            0.31087

In [20]:

# Cell 9: Export Submission
print("\n" + "=" * 70)
print("EXPORTING SUBMISSION")
print("=" * 70)

submission = recommender.export_submission(predictions, 'submission2.csv')

print(f"\nFirst 10 rows:")
print(submission.head(10))

print("\n" + "=" * 70)
print("✓ PIPELINE COMPLETE - READY FOR SUBMISSION")
print("=" * 70)


EXPORTING SUBMISSION

✓ Submission saved to 'submission2.csv'
  Total predictions: 275796

First 10 rows:
        ID  Target_purchase_next_1w  Target_qty_next_1w  \
0  438_278                 0.000007            0.168890   
1  367_179                 0.000007            0.168890   
2  637_130                 0.000007            0.168525   
3   568_62                 0.000007            0.168525   
4  667_168                 0.000007            0.267400   
5  778_163                 0.000007            0.168525   
6  625_171                 0.000007            0.198987   
7  651_433                 0.000007            0.168525   
8  482_178                 0.000007            0.168890   
9  389_588                 0.000007            0.168890   

   Target_purchase_next_2w  Target_qty_next_2w  
0                 0.020826            0.288133  
1                 0.018859            0.288133  
2                 0.020586            0.288133  
3                 0.021620            0.288133 