# Kernel PCA in python

Run on data from `./Data/all_in_one.csv`. It's a file which combined files `train.csv` and `unique_m.csv`.

In [58]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.decomposition import KernelPCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

np.random.seed(42)

### Downloading, spliting and standartisition

In [27]:
# download
data = pd.read_csv('./Data/all_in_one.csv')

In [28]:
# split 
train, test = train_test_split(data, test_size = 0.25, random_state = 0)

X_train = test.drop('critical_temp', axis=1)
X_test = train.drop('critical_temp', axis=1)

y_train = train[['critical_temp']]
y_test = test[['critical_temp']]

In [85]:
# standartisation

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [101]:
# KPCA with all components

kpca = KernelPCA(n_components = 158, kernel = 'rbf', n_jobs=14)

In [None]:
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

In [87]:
# Run linear regression
l_model = LinearRegression()

In [95]:
# Predict values
l_model.fit(X_train, y_train)

y_pred = l_model.predict(X_test)

In [97]:
import statsmodels.api as sm

y_train = l_model.predict(X_train)

X = sm.add_constant(X_train.ravel())
results = sm.OLS(y_train, x).fit()
results.summary() 

0,1,2,3
Dep. Variable:,y,R-squared:,0.937
Model:,OLS,Adj. R-squared:,0.936
Method:,Least Squares,F-statistic:,3505.0
Date:,"Wed, 08 Dec 2021",Prob (F-statistic):,0.0
Time:,21:47:14,Log-Likelihood:,-39063.0
No. Observations:,15947,AIC:,78260.0
Df Residuals:,15879,BIC:,78780.0
Df Model:,67,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,34.3753,0.022,1545.537,0.000,34.332,34.419
x1,75.7050,0.302,250.929,0.000,75.114,76.296
x2,31.9811,0.400,79.889,0.000,31.196,32.766
x3,56.0551,0.454,123.380,0.000,55.165,56.946
x4,6.5553,0.464,14.118,0.000,5.645,7.465
x5,3.9175,0.496,7.902,0.000,2.946,4.889
x6,46.5324,0.518,89.858,0.000,45.517,47.547
x7,66.4076,0.543,122.279,0.000,65.343,67.472
x8,-5.7262,0.548,-10.445,0.000,-6.801,-4.652

0,1,2,3
Omnibus:,4951.147,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,893421.044
Skew:,0.213,Prob(JB):,0.0
Kurtosis:,39.666,Cond. No.,40.6
