In [23]:
    
#Custum Transformer Leon
class CustomTransformer(BaseEstimator, TransformerMixin):
    # Constructor
    def __init__(self, param1=1, param2='default', transform_type='log', verbose=False):
        # Define class variables
        self.param1 = param1
        self.param2 = param2
        self.transform_type = transform_type
        self.verbose = verbose

    # Fit the transformer
    def fit(self, X, y=None):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.fit_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Compute mean transformation
        if self.transform_type == 'log':
            self.log_mean = np.mean(np.log(X[numerical_columns]), axis=0)
        elif self.transform_type == 'power':
            self.power_mean = np.mean(np.power(X[numerical_columns], 2), axis=0)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: fitting of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.fit_start_time, 3)} seconds.')

        # Return the transformer
        return self

    # Transform the data
    def transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.transform_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Apply mean transformation
        if self.transform_type == 'log':
            X_transformed = np.log(X[numerical_columns])
            X_transformed -= self.log_mean
        elif self.transform_type == 'power':
            X_transformed = np.power(X[numerical_columns], 2)
            X_transformed -= self.power_mean

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: transforming with the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.transform_start_time, 3)} seconds.')

        # Return the transformed data
        return X_transformed

    # Inverse transform the data
    def inverse_transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.inverse_transform_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Apply inverse mean transformation
        if self.transform_type == 'log':
            X_inverse_transformed = np.exp(X[numerical_columns] + self.log_mean)
        elif self.transform_type == 'power':
            X_inverse_transformed = np.sqrt(X[numerical_columns] + self.power_mean)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: inverse transforming with the custom inverse Transformer finished correctly in' \
                  f' {round(time.time()-self.inverse_transform_start_time, 3)} seconds.')

        # Return the inverse transformed data
        return X_inverse_transformed

In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
import time

# Custom Transformer
class CustomTransformer(BaseEstimator, TransformerMixin):
    # Constructor
    def __init__(self, param1=1, param2='default', transform_type='log', verbose=False):
        # Define class variables
        self.param1 = param1
        self.param2 = param2
        self.transform_type = transform_type
        self.verbose = verbose
        self.cat_columns = []

    # Fit the transformer
    def fit(self, X, y=None):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.fit_start_time = time.time()

        # Select numerical and categorical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns
        self.cat_columns = list(set(X.columns) - set(numerical_columns))

        # Compute mean transformation for numerical columns
        if self.transform_type == 'log':
            self.log_mean = np.mean(np.log(X[numerical_columns]), axis=0)
        elif self.transform_type == 'power':
            self.power_mean = np.mean(np.power(X[numerical_columns], 2), axis=0)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: fitting of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.fit_start_time, 3)} seconds.')

        # Return the transformer
        return self

    # Transform the data
    def transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.transform_start_time = time.time()

        # Select numerical and categorical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns
        cat_columns = list(set(X.columns) - set(numerical_columns))

        # Apply mean transformation for numerical columns
        if self.transform_type == 'log':
            X_transformed = np.log(X[numerical_columns])
            X_transformed -= self.log_mean
        elif self.transform_type == 'power':
            X_transformed = np.power(X[numerical_columns], 2)
            X_transformed -= self.power_mean

        # Encode categorical columns using one-hot encoding
        if len(self.cat_columns) > 0:
            X_cat_encoded = pd.get_dummies(X[self.cat_columns], drop_first=True)
            X_transformed = pd.concat([X_transformed, X_cat_encoded], axis=1)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: transforming with the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.transform_start_time, 3)} seconds.')

        # Return the transformed data
        return X_transformed

   

In [3]:
import category_encoders as ce

class CustomTransformer2(BaseEstimator, TransformerMixin):
    # Constructor
    def __init__(self, param1=1, param2='default', transform_type='log', verbose=False):
        # Define class variables
        self.param1 = param1
        self.param2 = param2
        self.transform_type = transform_type
        self.verbose = verbose
        
        # Initialize target encoders for categorical columns
        self.target_encoders = {}

    # Fit the transformer
    def fit(self, X, y=None):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.fit_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Initialize target encoders for categorical columns
        for col in categorical_columns:
            self.target_encoders[col] = ce.TargetEncoder()
            
        # Fit target encoders for categorical columns
        for col, encoder in self.target_encoders.items():
            encoder.fit(X[col], y)

        # Compute mean transformation
        if self.transform_type == 'log':
            self.log_mean = np.mean(np.log(X[numerical_columns]), axis=0)
        elif self.transform_type == 'power':
            self.power_mean = np.mean(np.power(X[numerical_columns], 2), axis=0)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: fitting of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.fit_start_time, 3)} seconds.')

        # Return the transformer
        return self

    # Transform the data
    def transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.transform_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Apply target encoding to categorical columns
        for col, encoder in self.target_encoders.items():
            X[col] = encoder.transform(X[col])

        # Apply mean transformation
        if self.transform_type == 'log':
            X_transformed = np.log(X[numerical_columns])
            X_transformed -= self.log_mean
        elif self.transform_type == 'power':
            X_transformed = np.power(X[numerical_columns], 2)
            X_transformed -= self.power_mean

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: transforming with the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.transform_start_time, 3)} seconds.')

        # Return the transformed data
        return X_transformed

    # Inverse transform the data
    def inverse_transform(self, X):
        # Make a copy of the dataset
        X = X


In [None]:
import category_encoders as ce
import numpy as np
import time
from sklearn.base import BaseEstimator, TransformerMixin

class CustomTransformer4(BaseEstimator, TransformerMixin):
    # Constructor
    def __init__(self, param1=1, param2='default', transform_type='log', verbose=False):
        # Define class variables
        self.param1 = param1
        self.param2 = param2
        self.transform_type = transform_type
        self.verbose = verbose
        
        # Initialize ordinal encoders for categorical columns
        self.ordinal_encoders = {}

    # Fit the transformer
    def fit(self, X, y=None):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.fit_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Initialize ordinal encoders for categorical columns
        for col in categorical_columns:
            self.ordinal_encoders[col] = ce.OrdinalEncoder()
            
        # Fit ordinal encoders for categorical columns
        for col, encoder in self.ordinal_encoders.items():
            encoder.fit(X[col])

        # Compute mean transformation
        if self.transform_type == 'log':
            self.log_mean = np.mean(np.log(X[numerical_columns]), axis=0)
        elif self.transform_type == 'power':
            self.power_mean = np.mean(np.power(X[numerical_columns], 2), axis=0)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: fitting of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.fit_start_time, 3)} seconds.')

        # Return the transformer
        return self

    # Transform the data
    def transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.transform_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Apply ordinal encoding to categorical columns
        for col, encoder in self.ordinal_encoders.items():
            X[col] = encoder.transform(X[col])

        # Apply mean transformation
        if self.transform_type == 'log':
            X_transformed = np.log(X[numerical_columns])
            X_transformed -= self.log_mean
        elif self.transform_type == 'power':
            X_transformed = np.power(X[numerical_columns], 2)
            X_transformed -= self.power_mean
        
        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: transformation of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.transform_start_time, 3)} seconds.')
        
        # Return the transformed dataset
        return X_transformed


In [1]:
import category_encoders as ce
import numpy as np
import time
from sklearn.base import BaseEstimator, TransformerMixin

class CustomTransformer4(BaseEstimator, TransformerMixin):
    # Constructor
    def __init__(self, param1=1, param2='default', transform_type='log', verbose=False):
        # Define class variables
        self.param1 = param1
        self.param2 = param2
        self.transform_type = transform_type
        self.verbose = verbose
        
        # Initialize ordinal encoders for categorical columns
        self.ordinal_encoders = {}

    # Fit the transformer
    def fit(self, X, y=None):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.fit_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Initialize ordinal encoders for categorical columns
        for col in categorical_columns:
            self.ordinal_encoders[col] = ce.OrdinalEncoder()
            
        # Fit ordinal encoders for categorical columns
        for col, encoder in self.ordinal_encoders.items():
            encoder.fit(X[col])

        # Compute mean transformation
        if self.transform_type == 'log':
            self.log_mean = np.mean(np.log(X[numerical_columns]), axis=0)
        elif self.transform_type == 'power':
            self.power_mean = np.mean(np.power(X[numerical_columns], 2), axis=0)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: fitting of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.fit_start_time, 3)} seconds.')

        # Return the transformer
        return self

    # Transform the data
    def transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.transform_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Apply ordinal encoding to categorical columns
        for col, encoder in self.ordinal_encoders.items():
            X[col] = encoder.transform(X[col])

        # Apply mean transformation
        if self.transform_type == 'log':
            X_transformed = np.log(X[numerical_columns])
            X_transformed -= self.log_mean
        elif self.transform_type == 'power':
            X_transformed = np.power(X[numerical_columns], 2)
            X_transformed -= self.power_mean
        
        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: transformation of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.transform_start_time, 3)} seconds.')
        
        # Return the transformed dataset
        return X_transformed


In [None]:
import category_encoders as ce
import numpy as np
import time
from sklearn.base import BaseEstimator, TransformerMixin

class CustomTransformer2(BaseEstimator, TransformerMixin):
    # Constructor
    def __init__(self, param1=1, param2='default', transform_type='log', verbose=False):
        # Define class variables
        self.param1 = param1
        self.param2 = param2
        self.transform_type = transform_type
        self.verbose = verbose
        
        # Initialize ordinal encoders for categorical columns
        self.ordinal_encoders = {}

    # Fit the transformer
    def fit(self, X, y=None):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.fit_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Initialize ordinal encoders for categorical columns
        for col in categorical_columns:
            self.ordinal_encoders[col] = ce.OrdinalEncoder()
            
        # Fit ordinal encoders for categorical columns
        for col, encoder in self.ordinal_encoders.items():
            encoder.fit(X[col])

        # Compute mean transformation
        if self.transform_type == 'log':
            self.log_mean = np.mean(np.log(X[numerical_columns]), axis=0)
        elif self.transform_type == 'power':
            self.power_mean = np.mean(np.power(X[numerical_columns], 2), axis=0)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: fitting of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.fit_start_time, 3)} seconds.')

        # Return the transformer
        return self

    # Transform the data
    def transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.transform_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Apply ordinal encoding to categorical columns
        for col, encoder in self.ordinal_encoders.items():
            X[col] = encoder.transform(X[col])

        # Apply mean transformation
        if self.transform_type == 'log':
            X_transformed = np.log(X[numerical_columns])
            X_transformed -= self.log_mean
        elif self.transform_type == 'power':
            X_transformed = np.power(X[numerical_columns], 2)
            X_transformed -= self.power_mean
        
        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: transformation of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.transform_start_time, 3)} seconds.')
        
        # Return the transformed dataset
        return X_transformed


In [None]:
from sklearn.preprocessing import LabelEncoder 
#from sklearn.preprocessing import labelencoder
import numpy as np
import time
from sklearn.base import BaseEstimator, TransformerMixin

class CustomTransformer2(BaseEstimator, TransformerMixin):
    # Constructor
    def __init__(self, param1=1, param2='default', transform_type='log', verbose=False):
        # Define class variables
        self.param1 = param1
        self.param2 = param2
        self.transform_type = transform_type
        self.verbose = verbose
        
        # Initialize ordinal encoders for categorical columns
        self.LabelEncoder = {}

    # Fit the transformer
    def fit(self, X, y=None):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.fit_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Initialize ordinal encoders for categorical columns
        for col in categorical_columns:
            self.LabelEncoder[col] = LabelEncoder()
            
        # Fit ordinal encoders for categorical columns
        for col, encoder in self.LabelEncoder.items():
            encoder.fit(X[col])

        # Compute mean transformation
        if self.transform_type == 'log':
            self.log_mean = np.mean(np.log(X[numerical_columns]), axis=0)
        elif self.transform_type == 'power':
            self.power_mean = np.mean(np.power(X[numerical_columns], 2), axis=0)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: fitting of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.fit_start_time, 3)} seconds.')

        # Return the transformer
        return self

    # Transform the data
    def transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.transform_start_time = time.time()

        # Select numerical columns
        numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Apply ordinal encoding to categorical columns
        for col, encoder in self.LabelEncoder.items():
            X[col] = encoder.transform(X[col])

        # Apply mean transformation
        if self.transform_type == 'log':
            X_transformed = np.log(X[numerical_columns])
            X_transformed -= self.log_mean
        elif self.transform_type == 'power':
            X_transformed = np.power(X[numerical_columns], 2)
            X_transformed -= self.power_mean
        
        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: transformation of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.transform_start_time, 3)} seconds.')
        
        # Return the transformed dataset
        return X_transformed

In [None]:
import category_encoders as ce
import numpy as np
import time
from sklearn.base import BaseEstimator, TransformerMixin

class CustomTransformer2(BaseEstimator, TransformerMixin):
    # Constructor
    def __init__(self, param1=1, param2='default', transform_type='log', verbose=False):
        # Define class variables
        self.param1 = param1
        self.param2 = param2
        self.transform_type = transform_type
        self.verbose = verbose
        
        # Initialize ordinal encoders for categorical columns
        self.ordinal_encoders = {}

    # Fit the transformer
    def fit(self, X, y=None):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.fit_start_time = time.time()

        # Select numerical columns
        #numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Initialize ordinal encoders for categorical columns
        for col in categorical_columns:
            self.ordinal_encoders[col] = ce.OrdinalEncoder()
            
        # Fit ordinal encoders for categorical columns
        for col, encoder in self.ordinal_encoders.items():
            encoder.fit(X[col])

        # Compute mean transformation
        if self.transform_type == 'log':
            self.log_mean = np.mean(np.log(X[numerical_columns]), axis=0)
        elif self.transform_type == 'power':
            self.power_mean = np.mean(np.power(X[numerical_columns], 2), axis=0)

        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: fitting of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.fit_start_time, 3)} seconds.')

        # Return the transformer
        return self

    # Transform the data
    def transform(self, X):
        # Make a copy of the dataset
        X = X.copy()

        # Record start time
        self.transform_start_time = time.time()

        # Select numerical columns
       # numerical_columns = X.select_dtypes(include=[np.number]).columns

        # Select categorical columns
        categorical_columns = X.select_dtypes(include=['category', 'object']).columns

        # Apply ordinal encoding to categorical columns
        for col, encoder in self.ordinal_encoders.items():
            X[col] = encoder.transform(X[col])

        # Apply mean transformation
        #if self.transform_type == 'log':
        #    X_transformed = np.log(X[numerical_columns])
        #    X_transformed -= self.log_mean
        #elif self.transform_type == 'power':
        #    X_transformed = np.power(X[numerical_columns], 2)
        #    X_transformed -= self.power_mean
        
        # Print information if verbose mode is on
        if self.verbose:
            print(f'INFO: transformation of the custom Transformer finished correctly in' \
                  f' {round(time.time()-self.transform_start_time, 3)} seconds.')
        
        # Return the transformed dataset
        return X_transformed
