<a href="https://colab.research.google.com/github/Madathanapalleleena/ML_LAB_152/blob/main/ML_wrapper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS, ExhaustiveFeatureSelector as EFS
from sklearn.feature_selection import RFE
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [7]:
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = wine.target

In [4]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [5]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
x_train_scaled_df = pd.DataFrame(x_train_scaled, columns=X.columns)
x_test_scaled_df = pd.DataFrame(x_test_scaled, columns=X.columns)

In [6]:
print(x_test_scaled.shape)

(54, 13)


In [10]:
clf = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=-1)

In [11]:
sfs = SFS(clf,
          k_features=5,
          forward=True,
          floating=False,
          scoring='accuracy',
          cv=3,
          n_jobs=-1)

print("Starting SFS (Forward Feature Selection)...")
sfs = sfs.fit(x_train_scaled, y_train)

print("\nSFS selected features:", sfs.k_feature_names_)
print("SFS best accuracy score: %.4f" % sfs.k_score_)
sfs_features_train = sfs.transform(x_train_scaled)
sfs_features_test = sfs.transform(x_test_scaled)
clf.fit(sfs_features_train, y_train)
print("SFS Test Accuracy: %.4f" % clf.score(sfs_features_test, y_test))

Starting SFS (Forward Feature Selection)...

SFS selected features: ('0', '1', '3', '6', '9')
SFS best accuracy score: 0.9760
SFS Test Accuracy: 0.9074


In [12]:
bfs = SFS(clf,
          k_features=5,
          forward=False,
          floating=False,
          scoring='accuracy',
          cv=3,
          n_jobs=-1)

print("\nStarting BFS (Backward Feature Selection)...")
bfs = bfs.fit(x_train_scaled, y_train)

# Print results
print("\nBFS selected features:", bfs.k_feature_names_)
print("BFS best accuracy score: %.4f" % bfs.k_score_)
bfs_features_train = bfs.transform(x_train_scaled)
bfs_features_test = bfs.transform(x_test_scaled)
clf.fit(bfs_features_train, y_train)
print("BFS Test Accuracy: %.4f" % clf.score(bfs_features_test, y_test))


Starting BFS (Backward Feature Selection)...

BFS selected features: ('0', '2', '4', '6', '9')
BFS best accuracy score: 0.9599
BFS Test Accuracy: 0.9630


In [13]:
efs = EFS(clf,
          min_features=1,
          max_features=4,
          scoring='accuracy',
          cv=3,
          n_jobs=-1)

print("\nStarting Exhaustive Feature Selector...")
efs = efs.fit(x_train_scaled, y_train)

print("\nExhaustive best features:", efs.best_feature_names_)
print("Exhaustive best accuracy score: %.4f" % efs.best_score_)
efs_features_train = efs.transform(x_train_scaled)
efs_features_test = efs.transform(x_test_scaled)
clf.fit(efs_features_train, y_train)
print("EFS Test Accuracy: %.4f" % clf.score(efs_features_test, y_test))


Starting Exhaustive Feature Selector...


Features: 1092/1092


Exhaustive best features: ('0', '4', '6', '10')
Exhaustive best accuracy score: 0.9758
EFS Test Accuracy: 0.9074


In [14]:
rfe = RFE(estimator=clf, n_features_to_select=5)

print("\nStarting RFE...")
rfe.fit(x_train_scaled, y_train)

print("\nRFE selected features:", X.columns[rfe.support_].tolist())
rfe_features_train = rfe.transform(x_train_scaled)
rfe_features_test = rfe.transform(x_test_scaled)
clf.fit(rfe_features_train, y_train)
print("RFE Test Accuracy: %.4f" % clf.score(rfe_features_test, y_test))


Starting RFE...

RFE selected features: ['alcohol', 'flavanoids', 'color_intensity', 'od280/od315_of_diluted_wines', 'proline']
RFE Test Accuracy: 1.0000


In [15]:
pca = PCA(n_components=5)
x_train_pca = pca.fit_transform(x_train_scaled)
x_test_pca = pca.transform(x_test_scaled)

clf.fit(x_train_pca, y_train)

print("\nPCA explained variance ratio:", pca.explained_variance_ratio_)
print("PCA Test Accuracy: %.4f" % clf.score(x_test_pca, y_test))


PCA explained variance ratio: [0.35730453 0.19209164 0.11006755 0.07250719 0.06973166]
PCA Test Accuracy: 0.9444


In [16]:
lda = LDA(n_components=2)
x_train_lda = lda.fit_transform(x_train_scaled, y_train)
x_test_lda = lda.transform(x_test_scaled)

clf.fit(x_train_lda, y_train)

print("\nLDA shape:", x_train_lda.shape)
print("LDA Test Accuracy: %.4f" % clf.score(x_test_lda, y_test))


LDA shape: (124, 2)
LDA Test Accuracy: 0.9630
