# üìà Regression Analysis: transactions

**Generated:** 2025-12-09 16:01:22  
**Type:** Regression Modeling  
**Dataset:** transactions

## üéØ Objective
This notebook provides a complete regression modeling workflow to predict continuous numerical values.

## üìã Workflow Steps
1. **Data Loading & Exploration**
2. **Target Variable Analysis**
3. **Feature Engineering & Preprocessing**
4. **Model Training & Comparison**
5. **Model Evaluation & Metrics**
6. **Residual Analysis**
7. **Predictions & Business Insights**

In [None]:
# Import essential libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score,
    mean_absolute_percentage_error
)
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

print("‚úÖ All libraries imported successfully!")

## 1. üìÅ Data Loading & Initial Exploration

In [None]:
# Load your dataset - REPLACE 'transactions.csv' with your actual file path
df = pd.read_csv('transactions.csv')

print("=== DATASET OVERVIEW ===")
print(f"Dataset shape: {df.shape}")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

print("\n=== DATA TYPES ===")
print(df.dtypes)

print("\n=== FIRST 5 ROWS ===")
display(df.head())

print("\n=== STATISTICAL SUMMARY ===")
display(df.describe())

print("\n=== MISSING VALUES ===")
missing_data = df.isnull().sum()
if missing_data.sum() > 0:
    print(missing_data[missing_data > 0])
else:
    print("No missing values found!")

## 2. üéØ Target Variable Analysis

In [None]:
# IMPORTANT: Define your target variable here
# REPLACE 'target_column' with your actual target column name
target_column = 'target_column'  # ‚ö†Ô∏è UPDATE THIS WITH YOUR TARGET COLUMN

# Check if target column exists
if target_column in df.columns:
    print(f"‚úÖ Target variable found: {target_column}")
    
    # Target statistics
    print("\n=== TARGET VARIABLE STATISTICS ===")
    print(f"Mean: {df[target_column].mean():.4f}")
    print(f"Median: {df[target_column].median():.4f}")
    print(f"Std Dev: {df[target_column].std():.4f}")
    print(f"Min: {df[target_column].min():.4f}")
    print(f"Max: {df[target_column].max():.4f}")
    print(f"Skewness: {df[target_column].skew():.4f}")
    print(f"Kurtosis: {df[target_column].kurtosis():.4f}")
    
    # Visualize target distribution
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.hist(df[target_column], bins=50, edgecolor='black', alpha=0.7)
    plt.axvline(df[target_column].mean(), color='red', linestyle='--', label=f'Mean: {df[target_column].mean():.2f}')
    plt.axvline(df[target_column].median(), color='green', linestyle='--', label=f'Median: {df[target_column].median():.2f}')
    plt.xlabel(target_column)
    plt.ylabel('Frequency')
    plt.title(f'Distribution of {target_column}')
    plt.legend()
    
    plt.subplot(1, 3, 2)
    plt.boxplot(df[target_column].dropna())
    plt.ylabel(target_column)
    plt.title(f'Box Plot of {target_column}')
    
    plt.subplot(1, 3, 3)
    from scipy import stats
    stats.probplot(df[target_column].dropna(), dist="norm", plot=plt)
    plt.title('Q-Q Plot (Normality Check)')
    
    plt.tight_layout()
    plt.show()
    
    # Check for outliers
    Q1 = df[target_column].quantile(0.25)
    Q3 = df[target_column].quantile(0.75)
    IQR = Q3 - Q1
    outliers = ((df[target_column] < Q1 - 1.5*IQR) | (df[target_column] > Q3 + 1.5*IQR)).sum()
    print(f"\nüìä Outliers detected (IQR method): {outliers} ({outliers/len(df)*100:.2f}%)")
    
    # Skewness check
    skewness = df[target_column].skew()
    if abs(skewness) > 1:
        print(f"‚ö†Ô∏è  Target is highly skewed ({skewness:.2f}). Consider log transformation.")
    elif abs(skewness) > 0.5:
        print(f"üìä Target is moderately skewed ({skewness:.2f}).")
    else:
        print(f"‚úÖ Target distribution is approximately normal ({skewness:.2f}).")
        
else:
    print(f"‚ùå Column '{target_column}' not found!")
    print(f"Available columns: {list(df.columns)}")
    print("\nPlease update the 'target_column' variable above.")

## 3. üìä Feature Analysis & Correlation

In [None]:
# Analyze features and their correlation with target
if target_column in df.columns:
    
    # Identify feature columns (exclude target and ID columns)
    id_columns = ['id', 'ID', 'index', 'customer_id', 'user_id']
    feature_columns = [col for col in df.columns 
                      if col != target_column and col not in id_columns]
    
    X = df[feature_columns].copy()
    y = df[target_column].copy()
    
    print(f"‚úÖ Features selected: {len(feature_columns)}")
    
    # Analyze feature types
    numeric_features = X.select_dtypes(include=[np.number]).columns.tolist()
    categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()
    
    print(f"\nüìä Numeric features ({len(numeric_features)}): {numeric_features}")
    print(f"üìã Categorical features ({len(categorical_features)}): {categorical_features}")
    
    # Correlation with target
    if len(numeric_features) > 0:
        print("\n=== CORRELATION WITH TARGET ===")
        correlations = df[numeric_features + [target_column]].corr()[target_column].drop(target_column)
        correlations_sorted = correlations.abs().sort_values(ascending=False)
        
        print("\nTop correlated features:")
        for feature in correlations_sorted.head(10).index:
            corr_value = correlations[feature]
            print(f"  {feature}: {corr_value:.4f}")
        
        # Correlation heatmap
        plt.figure(figsize=(12, 8))
        corr_matrix = df[numeric_features + [target_column]].corr()
        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, fmt='.2f')
        plt.title('Feature Correlation Matrix')
        plt.tight_layout()
        plt.show()
        
        # Scatter plots with target
        top_features = correlations_sorted.head(4).index.tolist()
        if len(top_features) > 0:
            fig, axes = plt.subplots(2, 2, figsize=(12, 10))
            axes = axes.ravel()
            
            for i, feature in enumerate(top_features[:4]):
                axes[i].scatter(df[feature], df[target_column], alpha=0.5)
                axes[i].set_xlabel(feature)
                axes[i].set_ylabel(target_column)
                axes[i].set_title(f'{feature} vs {target_column} (r={correlations[feature]:.3f})')
                
                # Add trend line
                z = np.polyfit(df[feature].dropna(), df[target_column].dropna(), 1)
                p = np.poly1d(z)
                axes[i].plot(df[feature].sort_values(), p(df[feature].sort_values()), 
                           "r--", alpha=0.8, label='Trend')
            
            plt.tight_layout()
            plt.show()
            
else:
    print("‚ùå Please define target column first!")

## 4. üîß Data Preprocessing

In [None]:
# Data preprocessing pipeline
if target_column in df.columns and 'X' in locals():
    
    # Handle missing values
    print("=== HANDLING MISSING VALUES ===")
    
    # For numeric features: fill with median
    if len(numeric_features) > 0:
        numeric_imputer = SimpleImputer(strategy='median')
        X[numeric_features] = numeric_imputer.fit_transform(X[numeric_features])
        print(f"‚úÖ Filled missing values in numeric features with median")
    
    # For categorical features: fill with mode and encode
    if len(categorical_features) > 0:
        from sklearn.preprocessing import LabelEncoder
        categorical_imputer = SimpleImputer(strategy='most_frequent')
        X[categorical_features] = categorical_imputer.fit_transform(X[categorical_features])
        
        # Encode categorical variables
        label_encoders = {}
        for col in categorical_features:
            le = LabelEncoder()
            X[col] = le.fit_transform(X[col].astype(str))
            label_encoders[col] = le
            print(f"‚úÖ Encoded {col}: {len(le.classes_)} unique values")
    
    # Handle missing values in target
    if y.isnull().sum() > 0:
        print(f"\n‚ö†Ô∏è  Dropping {y.isnull().sum()} rows with missing target values")
        valid_idx = y.notna()
        X = X[valid_idx]
        y = y[valid_idx]
    
    # Feature scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)
    
    print(f"\n‚úÖ Preprocessing completed!")
    print(f"Final feature matrix shape: {X.shape}")
    print(f"Target variable shape: {y.shape}")
    
else:
    print("‚ùå Please complete previous steps first!")

## 5. üöÇ Train-Test Split

In [None]:
# Split data into training and testing sets
if 'X' in locals() and 'y' in locals():
    
    test_size = 0.2  # 80% train, 20% test
    random_state = 42
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, 
        test_size=test_size, 
        random_state=random_state
    )
    
    # Also create scaled versions
    X_train_scaled, X_test_scaled, _, _ = train_test_split(
        X_scaled_df, y,
        test_size=test_size,
        random_state=random_state
    )
    
    print("=== TRAIN-TEST SPLIT COMPLETED ===")
    print(f"Training set: {X_train.shape}")
    print(f"Test set: {X_test.shape}")
    
    print("\n=== TARGET DISTRIBUTION ===")
    print(f"Training - Mean: {y_train.mean():.4f}, Std: {y_train.std():.4f}")
    print(f"Test - Mean: {y_test.mean():.4f}, Std: {y_test.std():.4f}")
    
else:
    print("‚ùå Please complete preprocessing first!")

## 6. ü§ñ Model Training & Comparison

In [None]:
# Train multiple regression models
if 'X_train' in locals():
    
    # Define models to compare
    models = {
        'Linear Regression': LinearRegression(),
        'Ridge Regression': Ridge(alpha=1.0),
        'Lasso Regression': Lasso(alpha=0.1),
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
        'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
        'K-Nearest Neighbors': KNeighborsRegressor(n_neighbors=5),
        'Support Vector Regression': SVR(kernel='rbf')
    }
    
    # Train and evaluate each model
    model_results = {}
    
    print("=== TRAINING MULTIPLE MODELS ===")
    
    for name, model in models.items():
        print(f"\nTraining {name}...")
        
        # Use scaled data for algorithms that need it
        if name in ['Support Vector Regression', 'K-Nearest Neighbors']:
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
            y_train_pred = model.predict(X_train_scaled)
        else:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            y_train_pred = model.predict(X_train)
        
        # Calculate metrics
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        r2_train = r2_score(y_train, y_train_pred)
        
        # MAPE (handle division by zero)
        try:
            mape = mean_absolute_percentage_error(y_test, y_pred) * 100
        except:
            mape = np.nan
        
        model_results[name] = {
            'model': model,
            'mse': mse,
            'rmse': rmse,
            'mae': mae,
            'r2': r2,
            'r2_train': r2_train,
            'mape': mape,
            'predictions': y_pred
        }
        
        print(f"‚úÖ {name} - R¬≤: {r2:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}")
    
    # Display results summary
    print("\n=== MODEL COMPARISON SUMMARY ===")
    results_df = pd.DataFrame({
        'Model': list(model_results.keys()),
        'R¬≤ (Test)': [results['r2'] for results in model_results.values()],
        'R¬≤ (Train)': [results['r2_train'] for results in model_results.values()],
        'RMSE': [results['rmse'] for results in model_results.values()],
        'MAE': [results['mae'] for results in model_results.values()],
        'MAPE (%)': [results['mape'] for results in model_results.values()]
    })
    
    results_df = results_df.sort_values('R¬≤ (Test)', ascending=False)
    display(results_df)
    
    # Visualize model comparison
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.barh(results_df['Model'], results_df['R¬≤ (Test)'])
    plt.xlabel('R¬≤ Score')
    plt.title('Model Comparison - R¬≤ Score')
    plt.xlim(0, 1)
    
    plt.subplot(1, 2, 2)
    plt.barh(results_df['Model'], results_df['RMSE'])
    plt.xlabel('RMSE')
    plt.title('Model Comparison - RMSE (lower is better)')
    
    plt.tight_layout()
    plt.show()
    
    # Select best model
    best_model_name = results_df.iloc[0]['Model']
    best_model = model_results[best_model_name]['model']
    best_predictions = model_results[best_model_name]['predictions']
    
    print(f"\nüèÜ BEST MODEL: {best_model_name}")
    print(f"   R¬≤ Score: {model_results[best_model_name]['r2']:.4f}")
    print(f"   RMSE: {model_results[best_model_name]['rmse']:.4f}")
    
else:
    print("‚ùå Please complete train-test split first!")

## 7. üìà Detailed Model Evaluation

In [None]:
# Detailed evaluation of the best model
if 'best_model' in locals():
    
    print(f"=== DETAILED EVALUATION: {best_model_name} ===")
    
    # Get predictions
    y_pred = best_predictions
    
    # Metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print("\nüìä REGRESSION METRICS:")
    print(f"R¬≤ Score:        {r2:.4f} ({r2*100:.2f}% variance explained)")
    print(f"RMSE:            {rmse:.4f}")
    print(f"MAE:             {mae:.4f}")
    print(f"MSE:             {mse:.4f}")
    
    # Residual analysis
    residuals = y_test - y_pred
    
    plt.figure(figsize=(15, 10))
    
    # Actual vs Predicted
    plt.subplot(2, 2, 1)
    plt.scatter(y_test, y_pred, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(f'Actual vs Predicted\nR¬≤ = {r2:.4f}')
    
    # Residuals vs Predicted
    plt.subplot(2, 2, 2)
    plt.scatter(y_pred, residuals, alpha=0.5)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.title('Residuals vs Predicted Values')
    
    # Residual distribution
    plt.subplot(2, 2, 3)
    plt.hist(residuals, bins=50, edgecolor='black', alpha=0.7)
    plt.axvline(x=0, color='r', linestyle='--')
    plt.xlabel('Residual Value')
    plt.ylabel('Frequency')
    plt.title(f'Residual Distribution\nMean: {residuals.mean():.4f}, Std: {residuals.std():.4f}')
    
    # Q-Q plot for residuals
    plt.subplot(2, 2, 4)
    from scipy import stats
    stats.probplot(residuals, dist="norm", plot=plt)
    plt.title('Q-Q Plot of Residuals')
    
    plt.tight_layout()
    plt.show()
    
    # Residual statistics
    print("\nüìä RESIDUAL ANALYSIS:")
    print(f"Mean Residual:   {residuals.mean():.4f} (should be ~0)")
    print(f"Std Residual:    {residuals.std():.4f}")
    print(f"Min Residual:    {residuals.min():.4f}")
    print(f"Max Residual:    {residuals.max():.4f}")
    
    # Check for heteroscedasticity
    correlation = np.corrcoef(y_pred, np.abs(residuals))[0, 1]
    if abs(correlation) > 0.3:
        print(f"\n‚ö†Ô∏è  Potential heteroscedasticity detected (correlation: {correlation:.3f})")
    else:
        print(f"\n‚úÖ No significant heteroscedasticity (correlation: {correlation:.3f})")
        
else:
    print("‚ùå Please complete model training first!")

## 8. üîç Feature Importance Analysis

In [None]:
# Analyze feature importance
if 'best_model' in locals():
    
    print(f"=== FEATURE IMPORTANCE ANALYSIS ===")
    
    if hasattr(best_model, 'feature_importances_'):
        # Tree-based models
        importance_df = pd.DataFrame({
            'Feature': X.columns,
            'Importance': best_model.feature_importances_,
            'Importance_Percentage': best_model.feature_importances_ * 100
        }).sort_values('Importance', ascending=False)
        
        print("\nüìä TOP 15 MOST IMPORTANT FEATURES:")
        display(importance_df.head(15))
        
        # Visualize
        plt.figure(figsize=(12, 8))
        top_features = importance_df.head(15)
        plt.barh(range(len(top_features)), top_features['Importance'])
        plt.yticks(range(len(top_features)), top_features['Feature'])
        plt.xlabel('Feature Importance')
        plt.title(f'Top 15 Feature Importance - {best_model_name}')
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()
        
    elif hasattr(best_model, 'coef_'):
        # Linear models
        coef_df = pd.DataFrame({
            'Feature': X.columns,
            'Coefficient': best_model.coef_,
            'Abs_Coefficient': np.abs(best_model.coef_)
        }).sort_values('Abs_Coefficient', ascending=False)
        
        print("\nüìä TOP 15 FEATURES BY COEFFICIENT MAGNITUDE:")
        display(coef_df.head(15))
        
        # Visualize
        plt.figure(figsize=(12, 8))
        top_features = coef_df.head(15)
        colors = ['green' if c > 0 else 'red' for c in top_features['Coefficient']]
        plt.barh(range(len(top_features)), top_features['Coefficient'], color=colors)
        plt.yticks(range(len(top_features)), top_features['Feature'])
        plt.xlabel('Coefficient Value')
        plt.title(f'Top 15 Feature Coefficients - {best_model_name}')
        plt.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()
        
        print("\nüí° Interpretation:")
        print("‚Ä¢ GREEN bars: Positive effect on target")
        print("‚Ä¢ RED bars: Negative effect on target")
        
    else:
        print(f"Feature importance not available for {best_model_name}")
        
else:
    print("‚ùå Please complete model training first!")

## 9. üéØ Predictions & Error Analysis

In [None]:
# Analyze predictions and errors
if 'best_model' in locals():
    
    print(f"=== PREDICTION ANALYSIS ===")
    
    # Create results dataframe
    results_df = pd.DataFrame({
        'Actual': y_test.values,
        'Predicted': best_predictions,
        'Residual': y_test.values - best_predictions,
        'Abs_Error': np.abs(y_test.values - best_predictions),
        'Pct_Error': np.abs(y_test.values - best_predictions) / np.abs(y_test.values) * 100
    })
    
    # Error statistics
    print("\nüìä ERROR DISTRIBUTION:")
    print(f"Mean Absolute Error: {results_df['Abs_Error'].mean():.4f}")
    print(f"Median Absolute Error: {results_df['Abs_Error'].median():.4f}")
    print(f"90th Percentile Error: {results_df['Abs_Error'].quantile(0.9):.4f}")
    print(f"95th Percentile Error: {results_df['Abs_Error'].quantile(0.95):.4f}")
    
    # Sample predictions
    print("\nüîç SAMPLE PREDICTIONS:")
    sample = results_df.head(15).round(4)
    display(sample)
    
    # Best and worst predictions
    print("\n‚úÖ BEST PREDICTIONS (lowest error):")
    display(results_df.nsmallest(5, 'Abs_Error').round(4))
    
    print("\n‚ùå WORST PREDICTIONS (highest error):")
    display(results_df.nlargest(5, 'Abs_Error').round(4))
    
    # Error distribution visualization
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.hist(results_df['Abs_Error'], bins=50, edgecolor='black', alpha=0.7)
    plt.axvline(results_df['Abs_Error'].mean(), color='red', linestyle='--', 
                label=f'Mean: {results_df["Abs_Error"].mean():.2f}')
    plt.xlabel('Absolute Error')
    plt.ylabel('Frequency')
    plt.title('Distribution of Absolute Errors')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.hist(results_df['Pct_Error'].clip(upper=100), bins=50, edgecolor='black', alpha=0.7)
    plt.xlabel('Percentage Error (%)')
    plt.ylabel('Frequency')
    plt.title('Distribution of Percentage Errors')
    
    plt.tight_layout()
    plt.show()
    
else:
    print("‚ùå Please complete model training first!")

## 10. ‚úÖ Summary & Next Steps

In [None]:
# Final summary and recommendations
print("=== üìà REGRESSION ANALYSIS COMPLETE ===")
print()

if 'best_model' in locals():
    r2 = model_results[best_model_name]['r2']
    rmse = model_results[best_model_name]['rmse']
    mae = model_results[best_model_name]['mae']
    
    print("üìä ANALYSIS SUMMARY:")
    print(f"‚Ä¢ Dataset: {dataset_name}")
    print(f"‚Ä¢ Target Variable: {target_column}")
    print(f"‚Ä¢ Best Model: {best_model_name}")
    print(f"‚Ä¢ R¬≤ Score: {r2:.4f} ({r2*100:.2f}% variance explained)")
    print(f"‚Ä¢ RMSE: {rmse:.4f}")
    print(f"‚Ä¢ MAE: {mae:.4f}")
    print(f"‚Ä¢ Features Used: {len(X.columns)}")
    print(f"‚Ä¢ Training Samples: {len(X_train)}")
    print(f"‚Ä¢ Test Samples: {len(X_test)}")
    
    print("\nüöÄ RECOMMENDED NEXT STEPS:")
    
    if r2 >= 0.9:
        print("1. ‚úÖ Excellent model! Ready for production deployment")
        print("2. üîÑ Set up model monitoring and retraining pipeline")
        print("3. üìà Consider A/B testing in production")
    elif r2 >= 0.7:
        print("1. üéØ Good performance! Try hyperparameter tuning")
        print("2. üîß Consider polynomial features for non-linear relationships")
        print("3. üìä Try ensemble methods (stacking, blending)")
    elif r2 >= 0.5:
        print("1. üîß Feature engineering needed")
        print("2. üìä Look for non-linear relationships")
        print("3. üéØ Consider more advanced algorithms (XGBoost, LightGBM)")
        print("4. üìà Collect more relevant features")
    else:
        print("1. üîç Review data quality and target definition")
        print("2. üéØ Significant feature engineering required")
        print("3. üìä Consider if regression is the right approach")
        print("4. ü§ù Consult domain experts for insights")
    
    print("\nüõ†Ô∏è  TECHNICAL IMPROVEMENTS:")
    print("‚Ä¢ Hyperparameter tuning with GridSearchCV/RandomizedSearchCV")
    print("‚Ä¢ Cross-validation for more robust evaluation")
    print("‚Ä¢ Try XGBoost, LightGBM, or CatBoost")
    print("‚Ä¢ Feature selection (RFE, SelectKBest)")
    print("‚Ä¢ Polynomial features for non-linear relationships")
    print("‚Ä¢ Log transformation if target is skewed")
    
    print("\nüíæ SAVE YOUR MODEL:")
    print("# Uncomment to save the trained model")
    print("# import joblib")
    print("# joblib.dump(best_model, 'regression_model.pkl')")
    print("# joblib.dump(scaler, 'scaler.pkl')")
    print("# print('Model saved successfully!')")
    
else:
    print("‚ö†Ô∏è  Analysis incomplete. Please run all previous cells.")

print("\nüéâ Regression analysis workflow completed!")