In [1]:
from fast_cubic_smpa import FastCubicSMPA
import numpy as np

# Your data
X = np.random.rand(100, 3)
y = (X[:, 0] + X[:, 1] > 1).astype(int)

model = FastCubicSMPA(n_control_points=6, max_iter=50)
model.fit(X, y)
preds = model.predict(X)
print(preds)

[0 1 1 1 0 0 0 1 0 1 0 1 1 1 1 1 0 0 0 1 1 1 0 1 0 0 0 0 1 0 0 0 1 0 1 1 1
 0 1 0 1 1 1 1 0 1 0 1 1 1 0 0 1 0 0 0 1 0 1 1 1 1 0 0 1 0 1 0 0 0 0 1 1 1
 0 0 1 0 1 0 0 0 1 0 0 1 1 1 1 1 0 0 0 0 1 1 0 0 1 0]


In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

In [3]:
diabetes_df = pd.read_csv("./Datasets/diabetes.csv")
train_df, test_df = train_test_split(diabetes_df, test_size = 0.3, random_state = 25)
X_train = train_df.drop("Outcome", axis = 1)
y_train = train_df["Outcome"]
X_test = test_df.drop("Outcome", axis = 1)
y_test = test_df["Outcome"]

In [4]:
X_train.to_numpy()
X_test.to_numpy()

array([[  9.   , 134.   ,  74.   , ...,  25.9  ,   0.46 ,  81.   ],
       [  4.   , 111.   ,  72.   , ...,  37.1  ,   1.39 ,  56.   ],
       [  8.   ,  95.   ,  72.   , ...,  36.8  ,   0.485,  57.   ],
       ...,
       [ 10.   , 162.   ,  84.   , ...,  27.7  ,   0.182,  54.   ],
       [  0.   ,  93.   , 100.   , ...,  43.4  ,   1.021,  35.   ],
       [  6.   , 123.   ,  72.   , ...,  33.6  ,   0.733,  34.   ]])

In [5]:
def reorder_least_correlated_feature(X, standardize=True, feature_order=None):
    """
    Reorder X based on correlation or a provided feature order.

    Parameters:
    - X: numpy array or pandas DataFrame
    - standardize: bool
    - feature_order: list, optional order from training set (default: None, compute from X)

    Returns:
    - X_reordered, feature_order (if computed)
    """
    is_dataframe = isinstance(X, pd.DataFrame)
    if is_dataframe:
        column_names = X.columns
        X_np = X.to_numpy()
    else:
        X_np = np.asarray(X)

    if feature_order is None:
        if standardize:
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X_np)
        else:
            X_scaled = X_np.copy()

        corr_matrix = np.abs(np.corrcoef(X_scaled, rowvar=False))
        n_features = X_np.shape[1]
        avg_corr = (np.sum(corr_matrix, axis=1) - np.diag(corr_matrix)) / (n_features - 1)
        target_idx = np.argmin(avg_corr)
        feature_order = list(range(n_features))
        feature_order.pop(target_idx)
        feature_order.append(target_idx)

    if is_dataframe:
        X_reordered = X.iloc[:, feature_order]
    else:
        X_reordered = X_np[:, feature_order]

    return X_reordered, feature_order

# Usage
X_train_reordered, feature_order = reorder_least_correlated_feature(X_train)
X_test_reordered, _ = reorder_least_correlated_feature(X_test, feature_order=feature_order)

In [6]:
type(X_train_reordered)

pandas.core.frame.DataFrame

In [7]:
from sklearn.metrics import classification_report

In [16]:
model = FastCubicSMPA(n_control_points=10, max_iter=1000, lambda_reg=0)
model.fit(X_train_reordered, y_train)
preds = model.predict(X_test_reordered)
cr = classification_report(y_test, preds)

In [17]:
print(cr)

              precision    recall  f1-score   support

           0       0.81      0.57      0.67       160
           1       0.42      0.69      0.52        71

    accuracy                           0.61       231
   macro avg       0.61      0.63      0.60       231
weighted avg       0.69      0.61      0.63       231

