In [None]:
# Solution implementations for key functions

def preprocess_data(df):
    """Preprocess the raw e-commerce data."""
    # Create a copy to avoid warnings
    df = df.copy()
    
    # Convert date to datetime
    df['date'] = pd.to_datetime(df['date'])
    
    # Handle missing values without inplace
    df['age'] = df['age'].fillna(df['age'].mean())
    df['time_on_site'] = df['time_on_site'].fillna(df['time_on_site'].median())
    
    # Create time-based features
    df['day_of_week'] = df['date'].dt.dayofweek
    df['month'] = df['date'].dt.month
    df['is_weekend'] = df['date'].dt.dayofweek.isin([5, 6]).astype(int)
    
    # Encode categorical variables
    df = pd.get_dummies(df, columns=['product_category', 'customer_segment'], drop_first=True)
    
    return df

def engineer_features(df):
    """Create new features for the model."""
    df = df.copy()
    
    # Customer behavior metrics
    df['items_per_minute'] = df['items_viewed'] / df['time_on_site']
    df['avg_time_per_item'] = df['time_on_site'] / df['items_viewed']
    
    # Customer value metrics
    df['purchase_per_previous'] = df['purchase_amount'] / (df['previous_purchases'] + 1)
    
    # Interaction terms
    df['age_purchase_interaction'] = df['age'] * df['previous_purchases']
    
    # Time-based features
    df['month_sin'] = np.sin(2 * np.pi * df['month']/12)
    df['month_cos'] = np.cos(2 * np.pi * df['month']/12)
    
    return df

def train_model(X, y):
    """Train the prediction model."""
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Convert to DataFrame to preserve feature names
    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)
    
    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)
    
    # Calculate score
    test_score = model.score(X_test_scaled, y_test)
    
    return model, test_score, X_train.columns  # Return feature names

def evaluate_model(model, X_test, y_test, feature_names):
    """Evaluate model performance."""
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': feature_names,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    # Visualize results
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, y_pred, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    plt.xlabel('Actual Purchase Amount')
    plt.ylabel('Predicted Purchase Amount')
    plt.title('Model Predictions vs Actual Values')
    plt.tight_layout()
    
    return {
        'mse': mse,
        'r2': r2,
        'feature_importance': feature_importance
    }