In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
# Function for splitting the data and applying LDA
#LDA Components: The n_components parameter should not exceed min(n_classes - 1, n_features),
#where n_classes is the number of unique classes in target variable, and n_features is the number of input features. 
#For the Wine dataset, which has 3 classes, the maximum number of components  can use is 2.
#Data Scaling: correctly applying standard scaling before performing LDA, which is essential since LDA is sensitive to the scale of the data.
def split_lda(indep_X, dep_Y, n_components=1):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    
    lda = LDA(n_components=n_components)
    X_train_lda = lda.fit_transform(X_train, y_train)
    X_test_lda = lda.transform(X_test)
    
    return X_train_lda, X_test_lda, y_train, y_test

In [3]:
# Function for calculating R2 score
def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    return r2

In [4]:
# Regression models
def linear(X_train, y_train, X_test, y_test):
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

def svm_linear(X_train, y_train, X_test, y_test):
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

def svm_nl(X_train, y_train, X_test, y_test):
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

def decision_tree(X_train, y_train, X_test, y_test):
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

def random_forest(X_train, y_train, X_test, y_test):
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)


In [5]:
# Aggregating the results
def lda_regression(acclin, accsvml, accsvmnl, accdes, accrf, n_components): 
    dataframe = pd.DataFrame(index=[f'LDA-{n_components}'], columns=['Linear', 'SVMl', 'SVMnl', 'Decision', 'Random'])
    
    dataframe['Linear'] = acclin
    dataframe['SVMl'] = accsvml
    dataframe['SVMnl'] = accsvmnl
    dataframe['Decision'] = accdes
    dataframe['Random'] = accrf
    
    return dataframe


In [6]:
# Loading the Wine dataset
dataset = pd.read_csv("Wine.csv")
indep_X = dataset.iloc[:, 0:13].values
dep_Y = dataset.iloc[:, 13].values


In [8]:
indep_X

array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]])

In [9]:
dep_Y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3])

In [7]:
# Loop over LDA components from 1 to 2
#The three classes in the Wine dataset correspond to different types of wine, and they are labeled as follows:
#Class 1: Wine type 1
#Class 2: Wine type 2
#Class 3: Wine type 3
#Three Classes: Since the Wine dataset has three classes, LDA can only create 3 - 1 = 2 components.
results = []

for n in range(1, 3):
    acclin = []
    accsvml = []
    accsvmnl = []
    accdes = []
    accrf = []

    X_train_lda, X_test_lda, y_train, y_test = split_lda(indep_X, dep_Y, n_components=n)
    
    acclin.append(linear(X_train_lda, y_train, X_test_lda, y_test))
    accsvml.append(svm_linear(X_train_lda, y_train, X_test_lda, y_test))
    accsvmnl.append(svm_nl(X_train_lda, y_train, X_test_lda, y_test))
    accdes.append(decision_tree(X_train_lda, y_train, X_test_lda, y_test))
    accrf.append(random_forest(X_train_lda, y_train, X_test_lda, y_test))
    
    result = lda_regression(acclin, accsvml, accsvmnl, accdes, accrf, n_components=n)
    results.append(result)

# Combine all results into a single DataFrame
final_results = pd.concat(results)
print("LDA Results:\n", final_results)


LDA Results:
          Linear      SVMl     SVMnl  Decision    Random
LDA-1  0.801466  0.800460  0.836602  0.867126  0.843652
LDA-2  0.804353  0.803364  0.966316  0.911417  0.900344
