In [3]:

# Preparing Data
import numpy as np
import pandas as pd
import math
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from sklearn.preprocessing import*
# Load the rock mines dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
df = pd.read_csv(url)
column_names = ["sensor_" + str(i) for i in range(1, 61)] + ["target"]
df.columns = column_names
df = df.reset_index()
df = df.rename(columns={"index": "id"})
df['target'] = df['target'].map({'M': 1, 'R': 0})
x_data = df.iloc[:, :60]
x_data = x_data.drop('id', axis=1)
y_data = df['target']
x_data['encoded_column'] = (x_data['sensor_2'] < 0.02).astype(int)
def log_transform(x, base=np.e, y=1):
    return np.log(x + y) / np.log(base)
def power_transform(x, power=2):
    return (x) ** power
def logit_transform(x):
    return np.log(x / (1 - x))
def exp_transform(x, base=np.e, y=0):
    return base ** (x + y)
def reciprocal_transform(x):
    return 1 / (x)
def arcsine_transform(x):
    return np.arcsin(np.sqrt(x))
def trig_transform(x, func=np.sin):
    return func(x)
def arc_trig_transform(x, func=np.sin):
    return func(np.sqrt(x))
def apply_transformer_and_suffix(dataframe, transformer, suffix, **kwargs):
    # Identify continuous columns (float or int data type)
    continuous_columns = dataframe.select_dtypes(include=['float64']).columns
    
    # Create an instance of the specified transformer with additional parameters
    scaled_data =transformer(dataframe[continuous_columns])
    
    # Create new column names with the specified suffix
    new_columns = [col + f"_{suffix}" for col in continuous_columns]
    
    # Create a new DataFrame with transformed data and modified column names
    #transformed_df = pd.DataFrame(scaled_data, columns=new_columns)
    transformed_df = scaled_data.copy()
    transformed_df.columns = new_columns
    # Combine transformed continuous columns with non-continuous columns
    non_continuous_columns = dataframe.drop(columns=continuous_columns)
    final_df = pd.concat([non_continuous_columns, transformed_df], axis=1)
    
    return final_df


log2_df = apply_transformer_and_suffix(x_data, log_transform, "log2", base=2, y=1)
lognat_df = apply_transformer_and_suffix(x_data, log_transform, "lognat", base=2.71828, y=1)
log10_df = apply_transformer_and_suffix(x_data, log_transform, "log10", base=10, y=1)
sqrt_df = apply_transformer_and_suffix(x_data, power_transform, "sqrt", power=0.5)
sq_df = apply_transformer_and_suffix(x_data, power_transform, "sq", power=2)
cub_df = apply_transformer_and_suffix(x_data, power_transform, "cub", power=3)
logit_df = apply_transformer_and_suffix(x_data, logit_transform, "logit")
exp_df = apply_transformer_and_suffix(x_data, exp_transform, "exp", base=np.e)
exp2_df = apply_transformer_and_suffix(x_data, exp_transform, "exp2", base=2)
reciprocal_df = apply_transformer_and_suffix(x_data, reciprocal_transform, "reciprocal")
sin_df = apply_transformer_and_suffix(x_data, trig_transform, "sin", func=np.sin)
sinh_df = apply_transformer_and_suffix(x_data, trig_transform, "sinh", func=np.sinh)
cos_df = apply_transformer_and_suffix(x_data, trig_transform, "cos", func=np.cos)
cosh_df = apply_transformer_and_suffix(x_data, trig_transform, "cosh", func=np.cosh)
tan_df = apply_transformer_and_suffix(x_data, trig_transform, "tan", func=np.tan)
tanh_df = apply_transformer_and_suffix(x_data, trig_transform, "tanh", func=np.tanh)
arc_sin = apply_transformer_and_suffix(x_data, arc_trig_transform, "arcsin", func=np.sin)
arc_cos = apply_transformer_and_suffix(x_data, arc_trig_transform, "arccos", func=np.cos)
arc_tan = apply_transformer_and_suffix(x_data, arc_trig_transform, "arctan", func=np.tan)

all_dfs = [log2_df, lognat_df, log10_df, sqrt_df, sq_df, cub_df, logit_df, exp_df, exp2_df,
           reciprocal_df, sin_df, sinh_df, cos_df, cosh_df, tan_df, tanh_df, arc_sin, arc_cos, arc_tan]
# Initialize an empty DataFrame to store the merged result
merged_df = pd.DataFrame()
# Concatenate DataFrames vertically, avoiding duplicate columns
for df in all_dfs:
    new_columns = [col for col in df.columns if col not in merged_df.columns]
    merged_df = pd.concat([merged_df, df[new_columns]], axis=1)
