# Start

In [6]:
import os
import warnings
import numpy as np
import pandas as pd
from pymfe.mfe import MFE
from sklearn.preprocessing import MinMaxScaler

save_path = './metafeatures/meta-features-table.csv'
path = "./datasets/"
files = os.listdir(path)

# Load datasets

In [4]:
i = 0
dataframe = None
X = None
y = None
X_list = []
y_list = []
dataset_list = []

# Meta features are extracted using the whole dataset
# (not using only the train partition)
for f in files:
  df = pd.read_csv(path + f)
  df = df.dropna()

  y = df.pop(df.columns[-1])
  X = df

  y_list.append(y.to_numpy())
  X_list.append(X.to_numpy())
  dataset_list.append(f)

  i += 1

i = 0

# meta_table_index = pd.DataFrame(data=dataset_list, columns=['dataset_name'])
# meta_table_index.to_csv('./metafeatures/meta-features-table-index.csv', index = False)

# Extract meta-features

In [None]:
warnings.filterwarnings('ignore')

# MFE with default settings AND random_state=42
mfe = MFE(random_state=42)

# First dataset has its meta features extracted before the
# "for loop" (to construct the 'meta_table' dataframe)
mfe.fit(X_list[0], y_list[0])
ft = mfe.extract(cat_cols='auto', suppress_warnings=True)
meta_table = pd.DataFrame(columns=ft[0])
meta_table.loc[len(meta_table)] = ft[1]

# Meta features are extracted from each dataset and then included
# as a meta instance in the meta table
for i in range(1, len(X_list)):
  mfe.fit(X_list[i], y_list[i])
  ft = mfe.extract(cat_cols='auto', suppress_warnings=True)
  meta_table.loc[len(meta_table)] = ft[1]


# NaN values in the meta table are set to zero
# Infinity values in the meta table are set to a very large number
data = meta_table.values
np.nan_to_num(data, copy=False)

# If a value is higher than MAX(float32), then change it to MAX(float32)
# some models break if we don't do this
row, column = np.where(data > np.finfo(np.float32).max)
for i in range(len(row)):
    data[row[i]][column[i]] = np.finfo(np.float32).max

# Meta features normalization
scaler = MinMaxScaler()
scaler.fit(data)
data = pd.DataFrame(scaler.transform(data), columns=meta_table.columns)

# Save meta features table
# data.to_csv(save_path, index = False)