In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from tabulate import tabulate
import warnings

In [2]:
# Importing the dataset
dataset1=pd.read_csv("Wine.csv",index_col=None)

df2=dataset1

df2 = pd.get_dummies(df2,dtype=int,drop_first=True)

indep_x = df2.iloc[:, 0:13].values
dep_y= df2.iloc[:, 13].values

In [3]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(indep_x, dep_y, test_size=0.3, random_state=42)

In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Initialize classifiers
classifiers = {
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Random Forest': RandomForestClassifier(random_state=42),
    'SVM': SVC(kernel='linear', random_state=42),
    'SVMNL': SVC(kernel = 'rbf', random_state = 0),
    'NAVIE' : GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42)
}


In [16]:
# Initialize dictionary to store results
results = {'Model': [], 'Without LCA Accuracy': [], 'With LCA Accuracy': [], 'Before LCA Columns': [], 'After LCA Columns': [], 'LCA Components': []}

# Perform feature selection and evaluation for each classifier
for name, clf in classifiers.items():
    # Fit the LDA model
    lda = LDA(n_components=2)
    X_train_lca = lda.fit_transform(X_train_scaled, y_train)
    X_test_lca = lda.transform(X_test_scaled)

    # Get the selected components
    selected_comp = [f'LDA{i+1}' for i in range(lda.n_components)]
    
    # Store LDA components and dimensions in the results
    results['Model'].append(name)
    results['LCA Components'].append(', '.join(selected_comp))
    results['Before LCA Columns'].append(X_train_scaled.shape[1])
    results['After LCA Columns'].append(X_train_lca.shape[1])

    # Train and evaluate the model without LCA
    clf.fit(X_train_scaled, y_train)
    accuracy_before_lca = clf.score(X_test_scaled, y_test)
    results['Without LCA Accuracy'].append(accuracy_before_lca)

    # Train and evaluate the model with LCA
    clf.fit(X_train_lca, y_train)
    accuracy_after_lca = clf.score(X_test_lca, y_test)
    results['With LCA Accuracy'].append(accuracy_after_lca)

# Convert the results dictionary to a DataFrame
results_df = pd.DataFrame(results)

# Display the results in a nice table format using tabulate
print("\nSummary of Model Accuracies with LCA Components:")
print(tabulate(results_df, headers='keys', tablefmt='pretty', showindex=False))


Summary of Model Accuracies with LCA Components:
+---------------------+----------------------+--------------------+--------------------+-------------------+----------------+
|        Model        | Without LCA Accuracy | With LCA Accuracy  | Before LCA Columns | After LCA Columns | LCA Components |
+---------------------+----------------------+--------------------+--------------------+-------------------+----------------+
|         KNN         |  0.9629629629629629  | 0.9814814814814815 |         13         |         2         |   LDA1, LDA2   |
|    Random Forest    |         1.0          | 0.9814814814814815 |         13         |         2         |   LDA1, LDA2   |
|         SVM         |  0.9814814814814815  | 0.9814814814814815 |         13         |         2         |   LDA1, LDA2   |
|        SVMNL        |  0.9814814814814815  | 0.9814814814814815 |         13         |         2         |   LDA1, LDA2   |
|        NAVIE        |         1.0          |        1.0         | 