In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

def encode_categorical_features(df, exclude_columns=None):
    """Encode categorical features using dummy encoding or binary encoding based on the number of unique values, while excluding specified columns from encoding.
    
    Args:
        df (pd.DataFrame): Input DataFrame containing features to be encoded.
        exclude_columns (list): List of columns to exclude from encoding.
    
    Returns:
        pd.DataFrame: DataFrame with encoded features.
        dict: Dictionary mapping each original column name to the encoding type used.
    """
    encoded_df = df.copy()
    encoding_info = {}
    if exclude_columns is None:
        exclude_columns = []
    
    categorical_columns = encoded_df.select_dtypes(include=['object', 'category']).columns
    
    for column in categorical_columns:
        if column in exclude_columns:
            encoding_info[column] = "Excluded"
            continue
        
        unique_values = encoded_df[column].nunique()
        if unique_values == 2:
            value_mapping = {label: idx for idx, label in enumerate(encoded_df[column].unique())}
            encoded_df[column] = encoded_df[column].map(value_mapping)
            encoding_info[column] = "Binary Encoding"
        elif unique_values >= 3:
            dummies = pd.get_dummies(encoded_df[column], prefix=column)
            encoded_df = pd.concat([encoded_df, dummies], axis=1)
            encoded_df.drop(column, axis=1, inplace=True)
            encoding_info[column] = "Dummy Encoding"
    
    return encoded_df, encoding_info

def run_model(encoded_df, target_column, model_class, model_name):
    """Helper function to train and evaluate a model."""
    try:
        X = encoded_df.drop(target_column, axis=1)
        y = encoded_df[target_column]
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        model = model_class()
        model.fit(X_train, y_train)
        
        accuracy = model.score(X_test, y_test)
        predictions = model.predict(X_test)
        
        return {
            "model": model,
            "accuracy": accuracy,
            "predictions": predictions,
            "model_name": model_name
        }
    except Exception as e:
        print(f"Error running model {model_name}: {e}")
        return None

def run_ml_pipeline(encoded_df, target_column):
    """
    Runs a machine learning pipeline using LogisticRegression and RandomForestClassifier on an encoded DataFrame.

    Parameters:
    - encoded_df: The encoded DataFrame.
    - target_column: The name of the target column in the DataFrame.

    Returns:
    - dict: Dictionary containing the models and their performances.
    """
    results = {}
    try:
        # Run Logistic Regression
        lr_result = run_model(encoded_df, target_column, LogisticRegression, "Logistic Regression")
        if lr_result is not None:
            results["Logistic Regression"] = lr_result
            print(f"Accuracy for {lr_result['model_name']}: {lr_result['accuracy']:.2f}")
            print("-" * 40)
        
        # Run Random Forest
        rf_result = run_model(encoded_df, target_column, RandomForestClassifier, "Random Forest")
        if rf_result is not None:
            results["Random Forest"] = rf_result
            print(f"Accuracy for {rf_result['model_name']}: {rf_result['accuracy']:.2f}")
            print("-" * 40)
        
        print("Final results before return:", results)
        return results
    except Exception as e:
        print(f"Error in run_ml_pipeline: {e}")
        return {}

