In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../')
import tokamakTK
from tokamakTK import get_regression

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import statsmodels.api as sm
import matplotlib.patches as mpatches

from collections import Counter

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
from yellowbrick.cluster import SilhouetteVisualizer

pd.set_option('display.max_columns', None)
path = "../data/"
fig_path = "../../../LATEX/Latex Images/"

sstyle = 'seaborn-v0_8-poster'
plt.style.use(sstyle)
plt.rc('font',family = 'serif')

In [3]:
# Obtained from Optimization

min_subset_ids_6357 = pd.read_csv(path+"R_ids_alpha_0.6357.csv")
min_subset_ids_9998 = pd.read_csv(path+"R_ids_alpha_0.9998.csv")

DB2 = pd.read_csv(path+"DB2P8.csv")
DB5 = pd.read_csv(path+"SELDB5_SVD.csv", low_memory=False) 

# Setting ELMy Dataset
DB5 = DB5[DB5["PHASE"].isin(['HGELM', 'HSELM', 'HGELMH', 'HSELMH'])]

# There is two shots from DB2P8 missing in DB5
missing_shots = DB2[~DB2.id.isin( DB5.id.values )].reset_index(drop=True)
DB5 = pd.concat([DB5, missing_shots], axis=0, ignore_index=True)

# Labeling shots that had great impact in decreasing alpha_R
DB5.insert(loc=2,column="label_6357",value=[0]*len(DB5))
DB5.loc[(DB5[DB5.id.isin(min_subset_ids_6357.id)].index), "label_6357"] = 1

DB5.insert(loc=2,column="label_9998",value=[0]*len(DB5))
DB5.loc[(DB5[DB5.id.isin(min_subset_ids_9998.id)].index), "label_9998"] = 1


print(
    "  Subset that decrease alpha-R to 0.6357\n--------\n" +
    f"{ round( (len(min_subset_ids_6357)/len(DB5))*100     ,2)  }% affected alpha_R\n" + 
    f"{ round( (1 - len(min_subset_ids_6357)/len(DB5))*100 ,2)  }% did not affect alpha_R" +
    "\n\n\n  Subset that decrease alpha-R to 0.9998\n--------\n" +
    f"{ round( (len(min_subset_ids_9998)/len(DB5))*100     ,2)  }% affected alpha_R\n" + 
    f"{ round( (1 - len(min_subset_ids_9998)/len(DB5))*100 ,2)  }% did not affect alpha_R"
)

  Subset that decrease alpha-R to 0.6357
--------
23.45% affected alpha_R
76.55% did not affect alpha_R


  Subset that decrease alpha-R to 0.9998
--------
9.88% affected alpha_R
90.12% did not affect alpha_R


## Regression of the decreasing dataset

In [4]:
get_regression( DB5[DB5.label_6357.isin([1])], DB2  )[1].summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.939
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,5363.0
Date:,"Sun, 21 May 2023",Prob (F-statistic):,0.0
Time:,19:36:13,Log-Likelihood:,461.74
No. Observations:,2776,AIC:,-905.5
Df Residuals:,2767,BIC:,-852.1
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.2596,0.037,-60.302,0.000,-2.333,-2.186
x1,1.4219,0.022,64.538,0.000,1.379,1.465
x2,0.0199,0.020,1.006,0.315,-0.019,0.059
x3,-0.1361,0.015,-9.257,0.000,-0.165,-0.107
x4,-0.5290,0.012,-44.915,0.000,-0.552,-0.506
x5,0.6358,0.037,17.157,0.000,0.563,0.708
x6,-0.0017,0.037,-0.044,0.965,-0.075,0.071
x7,-0.5566,0.048,-11.702,0.000,-0.650,-0.463
x8,0.1296,0.026,5.050,0.000,0.079,0.180

0,1,2,3
Omnibus:,29.156,Durbin-Watson:,0.611
Prob(Omnibus):,0.0,Jarque-Bera (JB):,47.712
Skew:,0.026,Prob(JB):,4.36e-11
Kurtosis:,3.64,Cond. No.,53.4


## Regression of the unafftected dataset

In [5]:
get_regression( DB5[DB5.label_6357.isin([0])], DB2, withDB2=True  )[1].summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.964
Model:,OLS,Adj. R-squared:,0.963
Method:,Least Squares,F-statistic:,15770.0
Date:,"Sun, 21 May 2023",Prob (F-statistic):,0.0
Time:,19:36:16,Log-Likelihood:,1815.6
No. Observations:,4786,AIC:,-3613.0
Df Residuals:,4777,BIC:,-3555.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.5844,0.026,-99.692,0.000,-2.635,-2.534
x1,0.7819,0.016,48.684,0.000,0.750,0.813
x2,0.2439,0.014,17.334,0.000,0.216,0.271
x3,0.4115,0.009,44.399,0.000,0.393,0.430
x4,-0.7532,0.006,-122.220,0.000,-0.765,-0.741
x5,2.1560,0.025,86.317,0.000,2.107,2.205
x6,0.5003,0.027,18.718,0.000,0.448,0.553
x7,0.7990,0.035,22.530,0.000,0.729,0.869
x8,0.2190,0.016,13.518,0.000,0.187,0.251

0,1,2,3
Omnibus:,166.448,Durbin-Watson:,0.736
Prob(Omnibus):,0.0,Jarque-Bera (JB):,210.895
Skew:,-0.392,Prob(JB):,1.6e-46
Kurtosis:,3.666,Cond. No.,66.8
