In [None]:
import pandas as pd
from collections import defaultdict

class TransactionConverter:
    def __init__(self, discretized_df, include_target=True):
        """
        Initialize the converter
        
        Parameters:
        -----------
        discretized_df : pandas DataFrame
            The discretized data
        include_target : bool
            Whether to include target variable in transactions
        """
        self.data = discretized_df
        self.include_target = include_target
        self.feature_mappings = {}
        
    def _create_feature_mappings(self):
        """Create meaningful names for discretized values."""
        for column in self.data.columns:
            unique_values = sorted(self.data[column].unique())
            
            # Create mapping for each feature's values
            if len(unique_values) <= 10:  # Discrete/categorical variable
                self.feature_mappings[column] = {
                    val: f"{column}_{val}" 
                    for val in unique_values
                }
            else:  # Continuous variable that was discretized
                self.feature_mappings[column] = {
                    val: f"{column}_bin_{val}" 
                    for val in unique_values
                }
    
    def convert_to_transaction_list(self):
        """
        Convert discretized data to list of transactions.
        Each transaction is a list of strings in format "feature_name=bin_number"
        
        Returns:
        --------
        list of lists, where each inner list contains items in the transaction
        """
        self._create_feature_mappings()
        transactions = []
        
        for _, row in self.data.iterrows():
            transaction = []
            
            for column in self.data.columns:
                # Skip target if not included
                if not self.include_target and column == 'target':
                    continue
                    
                value = row[column]
                item = f"{column}={value}"
                transaction.append(item)
                
            transactions.append(transaction)
            
        return transactions
    
    def convert_to_binary_matrix(self):
        """
        Convert discretized data to binary matrix format.
        Creates columns for each feature-value combination.
        
        Returns:
        --------
        pandas DataFrame with binary columns
        """
        self._create_feature_mappings()
        binary_dict = defaultdict(list)
        
        for _, row in self.data.iterrows():
            # Track which feature-values are present in this transaction
            present_features = set()
            
            for column in self.data.columns:
                if not self.include_target and column == 'target':
                    continue
                    
                value = row[column]
                present_features.add(f"{column}={value}")
            
            # Create binary values for all possible feature-values
            for column in self.data.columns:
                if not self.include_target and column == 'target':
                    continue
                    
                for unique_val in self.data[column].unique():
                    feature_val = f"{column}={unique_val}"
                    binary_dict[feature_val].append(
                        1 if feature_val in present_features else 0
                    )
        
        return pd.DataFrame(binary_dict)
    
    def convert_to_sparse_matrix(self):
        """
        Convert discretized data to sparse matrix format.
        Returns dictionary with transaction ID and corresponding items.
        
        Returns:
        --------
        dict where keys are transaction IDs and values are lists of items
        """
        self._create_feature_mappings()
        sparse_dict = {}
        
        for idx, row in self.data.iterrows():
            transaction_items = []
            
            for column in self.data.columns:
                if not self.include_target and column == 'target':
                    continue
                    
                value = row[column]
                item = f"{column}={value}"
                transaction_items.append(item)
                
            sparse_dict[idx] = transaction_items
            
        return sparse_dict

# Example usage
if __name__ == "__main__":
    # Create sample discretized data
    sample_data = pd.DataFrame({
        'feature1': [0, 1, 2, 1, 0],
        'feature2': [1, 1, 0, 2, 1],
        'feature3': [0, 2, 1, 1, 0],
        'target': [1, 0, 1, 0, 1]
    })
    
    # Initialize converter
    converter = TransactionConverter(sample_data, include_target=True)
    
    # Get different formats
    transactions = converter.convert_to_transaction_list()
    binary_matrix = converter.convert_to_binary_matrix()
    sparse_matrix = converter.convert_to_sparse_matrix()
    
    # Print results
    print("Transaction List Format:")
    for transaction in transactions[:2]:
        print(transaction)
        
    print("\nBinary Matrix Format (first 2 rows):")
    print(binary_matrix.head(2))
    
    print("\nSparse Matrix Format (first 2 transactions):")
    for tid in list(sparse_matrix.keys())[:2]:
        print(f"Transaction {tid}: {sparse_matrix[tid]}")