In [None]:
import pandas as pd
from scipy.io import arff
import os

class DataLoader:
    def __init__(self, dataset_name):
        """
        Initialize the data loader with the dataset name.
        :param dataset_name: Name of the dataset (the filename without extension).
        """
        self.file_path = os.path.join("data", f"{dataset_name}.arff")
    
    def load_arff_data(self):
        """
        Load an ARFF file and convert it to a pandas DataFrame.
        Missing values are handled as NaN.
        Categorical data is decoded from bytes to strings.
        :return: A pandas DataFrame with the loaded data.
        """
        data, meta = arff.loadarff(self.file_path)
        df = pd.DataFrame(data)
        
        # Convert bytes to strings for categorical data
        # This step is necessary because arff.loadarff loads string data as bytes
        for col in df.select_dtypes([object]).columns:
            df[col] = df[col].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
        
        return df


In [7]:
data_loader = DataLoader()

df_satimage = data_loader.load_arff_data("satimage")
df_splice   = data_loader.load_arff_data("splice")
df_vowel    = data_loader.load_arff_data("vowel")

In [1]:
from preprocessing import DataLoader, DataProcessor

In [2]:
# Initialize the DataLoader
data_loader     = DataLoader()
data_processor  = DataProcessor()

df_satimage, labels_satimage= data_loader.load_arff_data("satimage")
df_splice, labels_splice    = data_loader.load_arff_data("splice")
df_vowel, labels_vowel      = data_loader.load_arff_data("vowel")


df_satimage = data_processor.preprocess_dataset(df_satimage)
df_splice   = data_processor.preprocess_dataset(df_splice)
df_vowel    = data_processor.preprocess_dataset(df_vowel)

TypeError: __init__() got an unexpected keyword argument 'sparse'

In [8]:
df_splice.head()

Unnamed: 0,attribute_1,attribute_2,attribute_3,attribute_4,attribute_5,attribute_6,attribute_7,attribute_8,attribute_9,attribute_10,...,attribute_51,attribute_52,attribute_53,attribute_54,attribute_55,attribute_56,attribute_57,attribute_58,attribute_59,attribute_60
0,C,C,A,G,C,T,G,C,A,T,...,A,G,C,C,A,G,T,C,T,G
1,A,G,A,C,C,C,G,C,C,G,...,G,T,G,C,C,C,C,C,G,C
2,G,A,G,G,T,G,A,A,G,G,...,C,A,C,G,G,G,G,A,T,G
3,G,G,G,C,T,G,C,G,T,T,...,G,G,T,T,T,T,C,C,C,C
4,G,C,T,C,A,G,C,C,C,C,...,C,C,T,T,G,A,C,C,C,T
