# Kernel PCA in python

Run on data from `./Data/all_in_one.csv`. It's a file which combined files `train.csv` and `unique_m.csv`.

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.decomposition import KernelPCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

import statsmodels.api as sm
from sklearn.metrics import r2_score

np.random.seed(42)

### Downloading, spliting and standartisition

In [2]:
# download
data = pd.read_csv('./Data/all_in_one.csv')

In [3]:
# split 
train, test = train_test_split(data, test_size = 0.25, random_state = 0)

X_train = train.drop('critical_temp', axis=1)
X_test = test.drop('critical_temp', axis=1)

y_train = train[['critical_temp']]
y_test = test[['critical_temp']]

In [4]:
# standartisation

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### KPCA with all components

In [5]:
# KPCA with all components

kpca = KernelPCA(n_components = 158, kernel = 'rbf', n_jobs=14)

In [None]:
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

In [None]:
# Run linear regression
l_model = LinearRegression()

### Predict values

In [None]:
# Predict values
l_model.fit(X_train, y_train)

y_pred = l_model.predict(X_test)

In [None]:
r2_score(X_test, y_test)

In [None]:
y_train = l_model.predict(X_train)

X = sm.add_constant(X_train.ravel())
results = sm.OLS(y_train, x).fit()
results.summary() 