In [110]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [111]:
def LDA_features(indep_X, dep_Y):
    sc = StandardScaler()
    indep_X = sc.fit_transform(indep_X)
    
    lda = LDA(n_components=2)   # 👈 Change number here (2, 3, or 4)
    lda_features = lda.fit_transform(indep_X, dep_Y)
    return lda_features

In [112]:
# --- Split Function ---
def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    return X_train, X_test, y_train, y_test

In [113]:
# --- Common Prediction Function ---
def cm_prediction(classifier, X_test):
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    Accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return classifier, Accuracy, report, X_test, y_test, cm

In [114]:
# --- Model Functions ---
def logistic(X_train, y_train, X_test):
    classifier = LogisticRegression(random_state=0, max_iter=2000)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test)

def svm_linear(X_train, y_train, X_test):
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test)

def svm_NL(X_train, y_train, X_test):
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test)

def Navie(X_train, y_train, X_test):
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test)

def knn(X_train, y_train, X_test):
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test)

def Decision(X_train, y_train, X_test):
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test)

def random(X_train, y_train, X_test):
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test)

# --- Table Function ---
def LDA_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf):
    dataframe = pd.DataFrame(index=['LDA'], columns=['Logistic','SVMl','SVMnl','KNN','Navie','Decision','Random'])
    for number, idex in enumerate(dataframe.index):      
        dataframe['Logistic'][idex] = acclog[number]       
        dataframe['SVMl'][idex] = accsvml[number]
        dataframe['SVMnl'][idex] = accsvmnl[number]
        dataframe['KNN'][idex] = accknn[number]
        dataframe['Navie'][idex] = accnav[number]
        dataframe['Decision'][idex] = accdes[number]
        dataframe['Random'][idex] = accrf[number]
    return dataframe

In [115]:
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [116]:
# --- MAIN EXECUTION ---
dataset = pd.read_csv("Wine.csv")
indep_X = dataset.iloc[:, 0:13]
dep_Y = dataset.iloc[:, 13]

lda_features = LDA_features(indep_X, dep_Y)

acclog=[]; accsvml=[]; accsvmnl=[]; accknn=[]; accnav=[]; accdes=[]; accrf=[]

X_train, X_test, y_train, y_test = split_scalar(lda_features, dep_Y)

classifier, Accuracy, report, X_test, y_test, cm = logistic(X_train, y_train, X_test)
acclog.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = svm_linear(X_train, y_train, X_test)
accsvml.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = svm_NL(X_train, y_train, X_test)
accsvmnl.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = knn(X_train, y_train, X_test)
accknn.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = Navie(X_train, y_train, X_test)
accnav.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = Decision(X_train, y_train, X_test)
accdes.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = random(X_train, y_train, X_test)
accrf.append(Accuracy)

result = LDA_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf)

In [101]:
result
#1

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
LDA,0.866667,0.844444,0.866667,0.866667,0.866667,0.911111,0.888889


The table shows the accuracy scores for the models when LDA is used as a preprocessing step.

The Decision model achieved the highest accuracy at approximately 91.1%.

The Random Forest model had the second-highest accuracy at approximately 88.9%.

The other models (Logistic Regression, SVMl, SVMnl, KNN, and Naive Bayes) all performed similarly, with accuracies around 84.4% to 86.7%.

This table indicates that for this specific dataset and LDA setup, Decision Trees and Random Forest classifiers were the most effective models.

In [118]:
result
#2

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
LDA,1.0,1.0,1.0,1.0,1.0,1.0,1.0


The second table shows that all seven models achieved a perfect accuracy score of 1.0 (100%).

This result suggests that the dataset is highly separable after being transformed by LDA. 