In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [11]:
# Function for splitting the data and applying PCA
def split_pca(indep_X, dep_Y, n_components=2):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    
    return X_train_pca, X_test_pca, y_train, y_test

In [12]:
# Function for calculating R2 score
def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    return r2

In [13]:
# Regression models
def linear(X_train, y_train, X_test, y_test):
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

In [14]:
def svm_linear(X_train, y_train, X_test, y_test):
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

In [15]:
def svm_nl(X_train, y_train, X_test, y_test):
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

In [16]:
def decision_tree(X_train, y_train, X_test, y_test):
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

In [17]:
def random_forest(X_train, y_train, X_test, y_test):
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

In [18]:
# Aggregating the results
def pca_regression(acclin, accsvml, accsvmnl, accdes, accrf, n_components): 
    dataframe = pd.DataFrame(index=[f'PCA-{n_components}'], columns=['Linear', 'SVMl', 'SVMnl', 'Decision', 'Random'])
    
    dataframe['Linear'] = acclin
    dataframe['SVMl'] = accsvml
    dataframe['SVMnl'] = accsvmnl
    dataframe['Decision'] = accdes
    dataframe['Random'] = accrf
    
    return dataframe


In [19]:
# Loading the Wine dataset
dataset = pd.read_csv("Wine.csv")
indep_X = dataset.iloc[:, 0:13].values
dep_Y = dataset.iloc[:, 13].values

In [21]:
print(indep_X)

[[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]]


In [22]:
print(dep_Y)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]


In [20]:
# Loop over PCA components from 2 to 6
results = []

for n in range(2, 7):
    acclin = []
    accsvml = []
    accsvmnl = []
    accdes = []
    accrf = []

    X_train_pca, X_test_pca, y_train, y_test = split_pca(indep_X, dep_Y, n_components=n)
    
    acclin.append(linear(X_train_pca, y_train, X_test_pca, y_test))
    accsvml.append(svm_linear(X_train_pca, y_train, X_test_pca, y_test))
    accsvmnl.append(svm_nl(X_train_pca, y_train, X_test_pca, y_test))
    accdes.append(decision_tree(X_train_pca, y_train, X_test_pca, y_test))
    accrf.append(random_forest(X_train_pca, y_train, X_test_pca, y_test))
    
    result = pca_regression(acclin, accsvml, accsvmnl, accdes, accrf, n_components=n)
    results.append(result)

# Combine all results into a single DataFrame
final_results = pd.concat(results)
print("PCA Results:\n", final_results)


PCA Results:
          Linear      SVMl     SVMnl  Decision    Random
PCA-2  0.728888  0.725487  0.941336  0.955709  0.949951
PCA-3  0.729204  0.725924  0.957257  0.911417  0.940207
PCA-4  0.717897  0.712801  0.945427  0.955709  0.937992
PCA-5  0.745386  0.743062  0.940851  0.955709  0.941093
PCA-6  0.762643  0.763442  0.942069  0.867126  0.939764
