In [10]:
from fast_cubic_smpa import FastCubicSMPA
import numpy as np

# Your data
X = np.random.rand(100, 3)
y = (X[:, 0] + X[:, 1] > 1).astype(int)

model = FastCubicSMPA(n_control_points=6, max_iter=50)
model.fit(X, y)
preds = model.predict(X)
print(preds)

[1 0 1 0 0 0 0 0 1 1 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 1 0 1 0 1 1 1 1 0
 1 0 0 1 1 1 1 0 1 1 1 0 0 0 1 0 0 0 1 0 1 1 1 0 0 0 1 0 0 1 0 0 1 1 0 1 1
 1 1 0 0 1 0 1 1 0 0 1 1 1 1 0 1 1 0 1 1 0 1 0 1 1 1]


In [11]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

In [12]:
diabetes_df = pd.read_csv("./Datasets/diabetes.csv")
train_df, test_df = train_test_split(diabetes_df, test_size = 0.3, random_state = 25)
X_train = train_df.drop("Outcome", axis = 1)
y_train = train_df["Outcome"]
X_test = test_df.drop("Outcome", axis = 1)
y_test = test_df["Outcome"]

In [13]:
X_train.to_numpy()
X_test.to_numpy()

array([[  9.   , 134.   ,  74.   , ...,  25.9  ,   0.46 ,  81.   ],
       [  4.   , 111.   ,  72.   , ...,  37.1  ,   1.39 ,  56.   ],
       [  8.   ,  95.   ,  72.   , ...,  36.8  ,   0.485,  57.   ],
       ...,
       [ 10.   , 162.   ,  84.   , ...,  27.7  ,   0.182,  54.   ],
       [  0.   ,  93.   , 100.   , ...,  43.4  ,   1.021,  35.   ],
       [  6.   , 123.   ,  72.   , ...,  33.6  ,   0.733,  34.   ]])

In [14]:
def reorder_least_correlated_feature(X, standardize=True, feature_order=None):
    """
    Reorder X based on correlation or a provided feature order.

    Parameters:
    - X: numpy array or pandas DataFrame
    - standardize: bool
    - feature_order: list, optional order from training set (default: None, compute from X)

    Returns:
    - X_reordered, feature_order (if computed)
    """
    is_dataframe = isinstance(X, pd.DataFrame)
    if is_dataframe:
        column_names = X.columns
        X_np = X.to_numpy()
    else:
        X_np = np.asarray(X)

    if feature_order is None:
        if standardize:
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X_np)
        else:
            X_scaled = X_np.copy()

        corr_matrix = np.abs(np.corrcoef(X_scaled, rowvar=False))
        n_features = X_np.shape[1]
        avg_corr = (np.sum(corr_matrix, axis=1) - np.diag(corr_matrix)) / (n_features - 1)
        target_idx = np.argmin(avg_corr)
        feature_order = list(range(n_features))
        feature_order.pop(target_idx)
        feature_order.append(target_idx)

    if is_dataframe:
        X_reordered = X.iloc[:, feature_order]
    else:
        X_reordered = X_np[:, feature_order]

    return X_reordered, feature_order

# Usage
X_train_reordered, feature_order = reorder_least_correlated_feature(X_train)
X_test_reordered, _ = reorder_least_correlated_feature(X_test, feature_order=feature_order)

In [15]:
type(X_train_reordered)

pandas.core.frame.DataFrame

In [19]:
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
import time

In [20]:
def run_grid_search(X, y):
    param_grid = {
        'n_control_points': [6, 9, 12, 15],  # Test your higher values
        'max_iter': [200, 500, 1000],        # More iterations
        'lambda_reg': [1e-6, 1e-4, 1e-2]    # Include very low regularization
    }

    base_model = FastCubicSMPA(verbose=False)
    grid_search = GridSearchCV(
        estimator=base_model,
        param_grid=param_grid,
        cv=5,
        scoring='accuracy',  # Switch to accuracy to match your 71% target
        n_jobs=-1,
        verbose=1,
        return_train_score=True
    )

    start_time = time.time()
    grid_search.fit(X, y)
    fit_time = time.time() - start_time

    print("\nGrid Search Results:")
    print(f"Best Parameters: {grid_search.best_params_}")
    print(f"Best CV Accuracy: {grid_search.best_score_:.4f}")
    print(f"Total Fit Time: {fit_time:.2f} seconds")

    best_model = grid_search.best_estimator_
    preds = best_model.predict(X)
    print("\nClassification Report on Full Dataset:")
    print(classification_report(y, preds))

    return grid_search

In [21]:
run_grid_search(X_train_reordered, y_train)

TypeError: Cannot clone object '<fast_cubic_smpa.FastCubicSMPA object at 0x7b35c37a88f0>' (type <class 'fast_cubic_smpa.FastCubicSMPA'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.