# Kernel PCA in python

Run on data from `./Data/all_in_one.csv`. It's a file which combined files `train.csv` and `unique_m.csv`.

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.decomposition import KernelPCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

import statsmodels.api as sm
from sklearn.metrics import r2_score

np.random.seed(42)

### Downloading, spliting and standartisition

In [29]:
# download
data = pd.read_csv('./Data/all_in_one.csv')

(21263, 159)

In [3]:
# split 
train, test = train_test_split(data, test_size = 0.25, random_state = 0)

X_train = train.drop('critical_temp', axis=1)
X_test = test.drop('critical_temp', axis=1)

y_train = train[['critical_temp']]
y_test = test[['critical_temp']]

In [4]:
# standartisation

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### KPCA with all components

In [5]:
# KPCA with all components

kpca = KernelPCA(n_components = 158, kernel = 'rbf', n_jobs=14)

In [7]:
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

In [8]:
# Run linear regression
l_model = LinearRegression()

### Predict values

In [20]:
# Predict values
l_model.fit(X_train, y_train)
y_pred = l_model.predict(X_test)

y_tt = l_model.predict(X_train)

In [23]:
r2_score(y_test, y_pred)

0.7948754392727637

In [25]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_test, y_pred)

10.739458823358373

# Run with 65 PC

In [30]:
# KPCA with all components

kpca_67 = KernelPCA(n_components = 67, kernel = 'rbf', n_jobs=14)

In [None]:
X_train_67 = kpca.fit_transform(X_train)
X_test_67 = kpca.transform(X_test)

In [None]:
# Run linear regression
l_model_67 = LinearRegression()

In [None]:
# Predict values
l_model_67.fit(X_train_67, y_train)
y_pred_67 = l_model.predict(X_test_67)

In [None]:
r2_score(y_test, y_pred)

In [None]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_test, y_pred)