In [2]:
from sklearn.datasets import load_iris
import pandas as pd

# Load Iris dataset
data = load_iris()
X, y = data.data, data.target

# Convert to DataFrame for easier viewing
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y

df.head()  # Show the first 5 rows


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")


Training set shape: (120, 4)
Test set shape: (30, 4)


In [4]:
from autoopt.preprocessing import standardize_data

X_standard = standardize_data(X, method="standard")
X_minmax = standardize_data(X, method="minmax")
X_robust = standardize_data(X, method="robust")

print("StandardScaler (first 5 rows):\n", X_standard[:5])
print("MinMaxScaler (first 5 rows):\n", X_minmax[:5])
print("RobustScaler (first 5 rows):\n", X_robust[:5])


StandardScaler (first 5 rows):
 [[-0.90068117  1.01900435 -1.34022653 -1.3154443 ]
 [-1.14301691 -0.13197948 -1.34022653 -1.3154443 ]
 [-1.38535265  0.32841405 -1.39706395 -1.3154443 ]
 [-1.50652052  0.09821729 -1.2833891  -1.3154443 ]
 [-1.02184904  1.24920112 -1.34022653 -1.3154443 ]]
MinMaxScaler (first 5 rows):
 [[0.22222222 0.625      0.06779661 0.04166667]
 [0.16666667 0.41666667 0.06779661 0.04166667]
 [0.11111111 0.5        0.05084746 0.04166667]
 [0.08333333 0.45833333 0.08474576 0.04166667]
 [0.19444444 0.66666667 0.06779661 0.04166667]]
RobustScaler (first 5 rows):
 [[-0.53846154  1.         -0.84285714 -0.73333333]
 [-0.69230769  0.         -0.84285714 -0.73333333]
 [-0.84615385  0.4        -0.87142857 -0.73333333]
 [-0.92307692  0.2        -0.81428571 -0.73333333]
 [-0.61538462  1.2        -0.84285714 -0.73333333]]


In [5]:
from autoopt.preprocessing import normalize_data

X_normalized = normalize_data(X)

print("L2 Normalization (first 5 rows):\n", X_normalized[:5])


L2 Normalization (first 5 rows):
 [[0.80377277 0.55160877 0.22064351 0.0315205 ]
 [0.82813287 0.50702013 0.23660939 0.03380134]
 [0.80533308 0.54831188 0.2227517  0.03426949]
 [0.80003025 0.53915082 0.26087943 0.03478392]
 [0.790965   0.5694948  0.2214702  0.0316386 ]]


In [7]:
from autoopt.optimizer import run_grid_search

best_model_grid, best_params_grid = run_grid_search("random_forest", X_train, y_train)

print("Best Model (GridSearchCV):", best_model_grid)
print("Best Parameters (GridSearchCV):", best_params_grid)



Fitting 5 folds for each of 27 candidates, totalling 135 fits
Best Model (GridSearchCV): RandomForestClassifier(min_samples_split=5)
Best Parameters (GridSearchCV): {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 100}


In [9]:
from autoopt.optimizer import run_randomized_search

best_model_random, best_params_random = run_randomized_search("svm", X_train, y_train, n_iter=5)

print("Best Model (RandomizedSearchCV):", best_model_random)
print("Best Parameters (RandomizedSearchCV):", best_params_random)


Fitting 5 folds for each of 5 candidates, totalling 25 fits
Best Model (RandomizedSearchCV): SVC(C=1, gamma='auto', kernel='linear')
Best Parameters (RandomizedSearchCV): {'kernel': 'linear', 'gamma': 'auto', 'C': 1}


In [10]:
from autoopt.pruning import run_pruning_algorithms

pruned_model_l1, X_pruned_l1 = run_pruning_algorithms("logistic_regression", X_train, y_train, alpha=0.1)

print(f"L1 Pruning - Shape after pruning: {X_pruned_l1.shape}")


L1 Pruning - Shape after pruning: (120, 4)


In [12]:
from autoopt.optimizer import run_grid_search

best_model_grid, best_params_grid = run_grid_search("random_forest", X_train, y_train)

print("Best Model (GridSearchCV):", best_model_grid)
print("Best Parameters (GridSearchCV):", best_params_grid)


Fitting 5 folds for each of 27 candidates, totalling 135 fits
Best Model (GridSearchCV): RandomForestClassifier(n_estimators=200)
Best Parameters (GridSearchCV): {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}


In [13]:
from autoopt.optimizer import run_randomized_search

best_model_random, best_params_random = run_randomized_search("svm", X_train, y_train, n_iter=5)

print("Best Model (RandomizedSearchCV):", best_model_random)
print("Best Parameters (RandomizedSearchCV):", best_params_random)


Fitting 5 folds for each of 5 candidates, totalling 25 fits
Best Model (RandomizedSearchCV): SVC(C=1, gamma='auto', kernel='linear')
Best Parameters (RandomizedSearchCV): {'kernel': 'linear', 'gamma': 'auto', 'C': 1}


In [15]:
from autoopt.pruning import run_pruning_algorithms

pruned_model_l1, X_pruned_l1 = run_pruning_algorithms("logistic_regression", X_train, y_train, alpha=0.1)

print(f"L1 Pruning - Shape after pruning: {X_pruned_l1.shape}")


L1 Pruning - Shape after pruning: (120, 4)


In [17]:
pruned_model_importance, X_pruned_importance = run_pruning_algorithms("random_forest", X_train, y_train, n_features_to_select=3)

print(f"Feature Importance Pruning - Shape after pruning: {X_pruned_importance.shape}")


Feature Importance Pruning - Shape after pruning: (120, 3)


In [18]:
from autoopt.pruning import recursive_feature_elimination
from sklearn.ensemble import RandomForestClassifier

rfe_model = RandomForestClassifier(n_estimators=100, random_state=42)
pruned_model_rfe, X_pruned_rfe = recursive_feature_elimination(rfe_model, X_train, y_train, n_features_to_select=3)

print(f"RFE Pruning - Shape after pruning: {X_pruned_rfe.shape}")


RFE Pruning - Shape after pruning: (120, 3)
