In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append('../')
import tokamakTK
from tokamakTK import get_ECT_regression

import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy as sp
import statsmodels.api as sm
import matplotlib.patches as mpatches

from collections import Counter
from scipy.stats import f
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

pd.set_option('display.max_columns', None)
path = "../data/"
fig_path = "../../../LATEX/Latex Images/"

plt.rc('font',family = 'serif')

features = ['IP', 'BT', 'NEL', 'PLTH', 'RGEO', 'KAREA', 'EPS', 'MEFF']

In [5]:
# Obtained from Optimization

min_subset_ids_6357 = pd.read_csv(path+"R_ids_alpha_0.6357.csv")
min_subset_ids_9998 = pd.read_csv(path+"R_ids_alpha_0.9998.csv")
min_subset_ids_joe  = pd.read_csv(path+"deviation_id.csv")

DB2 = pd.read_csv(path+"DB2P8.csv")
DB5 = pd.read_csv(path+"SELDB5_SVD.csv", low_memory=False) 

# Setting ELMy Dataset
DB5 = DB5[DB5["PHASE"].isin(['HGELM', 'HSELM', 'HGELMH', 'HSELMH'])]

# Removing Spherical TOKAMAKS
DB5 = DB5[~DB5["TOK"].isin(['START','MAST','NSTX'])]

# There is two shots from DB2P8 missing in DB5
missing_shots = DB2[~DB2.id.isin( DB5.id.values )].reset_index(drop=True)
DB5 = pd.concat([DB5, missing_shots], axis=0, ignore_index=True)

# Labeling shots that had great impact in decreasing alpha_R
DB5.insert(loc=2,column="label_6357",value=[0]*len(DB5))
DB5.loc[(DB5[DB5.id.isin(min_subset_ids_6357.id)].index), "label_6357"] = 1

DB5.insert(loc=2,column="label_9998",value=[0]*len(DB5))
DB5.loc[(DB5[DB5.id.isin(min_subset_ids_9998.id)].index), "label_9998"] = 1

DB5.insert(loc=2,column="label_joe",value=[0]*len(DB5))
DB5.loc[(DB5[DB5.id.isin(min_subset_ids_joe.id)].index), "label_joe"] = 1

DB5.insert(0, 'intercept', np.ones(len(DB5)))


print(
    "  Subset that decrease alpha-R to 0.6357\n--------\n" +
    f"{ round( (len(min_subset_ids_6357)/len(DB5))*100     ,2)  }% affected alpha_R\n" + 
    f"{ round( (1 - len(min_subset_ids_6357)/len(DB5))*100 ,2)  }% did not affect alpha_R" +
    "\n\n\n  Subset that decrease alpha-R to 0.9998\n--------\n" +
    f"{ round( (len(min_subset_ids_9998)/len(DB5))*100     ,2)  }% affected alpha_R\n" + 
    f"{ round( (1 - len(min_subset_ids_9998)/len(DB5))*100 ,2)  }% did not affect alpha_R"
    "\n\n\n  Subset given by Joseph Hall           \n--------\n" +
    f"{ round( (len(min_subset_ids_joe)/len(DB5))*100     ,2)  }% affected alpha_R\n" + 
    f"{ round( (1 - len(min_subset_ids_joe)/len(DB5))*100 ,2)  }% did not affect alpha_R"
)

  Subset that decrease alpha-R to 0.6357
--------
24.37% affected alpha_R
75.63% did not affect alpha_R


  Subset that decrease alpha-R to 0.9998
--------
10.27% affected alpha_R
89.73% did not affect alpha_R


  Subset given by Joseph Hall           
--------
19.18% affected alpha_R
80.82% did not affect alpha_R


In [11]:
get_ECT_regression(DB5[DB5.label_joe.isin([1]) | DB5.id.isin(DB2.id.values)]).summary()

0,1,2,3
Dep. Variable:,TAUTH,R-squared:,0.937
Model:,OLS,Adj. R-squared:,0.937
Method:,Least Squares,F-statistic:,4456.0
Date:,"Wed, 31 May 2023",Prob (F-statistic):,0.0
Time:,13:50:02,Log-Likelihood:,378.59
No. Observations:,2410,AIC:,-739.2
Df Residuals:,2401,BIC:,-687.1
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.0541,0.050,-41.335,0.000,-2.152,-1.957
IP,1.3449,0.024,56.414,0.000,1.298,1.392
BT,0.0322,0.026,1.239,0.215,-0.019,0.083
NEL,-0.1059,0.015,-6.964,0.000,-0.136,-0.076
PLTH,-0.6089,0.012,-50.795,0.000,-0.632,-0.585
RGEO,0.8066,0.040,20.231,0.000,0.728,0.885
KAREA,0.1108,0.042,2.664,0.008,0.029,0.192
EPS,-0.2819,0.050,-5.603,0.000,-0.380,-0.183
MEFF,0.1695,0.028,5.989,0.000,0.114,0.225

0,1,2,3
Omnibus:,4.783,Durbin-Watson:,0.616
Prob(Omnibus):,0.091,Jarque-Bera (JB):,4.719
Skew:,-0.106,Prob(JB):,0.0945
Kurtosis:,3.042,Cond. No.,52.1


In [6]:
all_cases = [DB2.copy(),
             DB5.copy(),
             DB5[DB5.label_joe.isin([1]) | DB5.id.isin(DB2.id.values)],
             DB5[DB5.label_joe.isin([0])]
]

def get_FPvalues(full_model, reduced_model):
    return (anova_lm(reduced_model, full_model)["F"][1], 
            anova_lm(reduced_model, full_model)["Pr(>F)"][1])

In [9]:
# Creating table with F-test and P-test values per case
TESTS = pd.DataFrame(np.zeros((len(features), len(all_cases)*2)), 
                    columns=['FV0', 'PV0', 'FV1', 'PV1', 'FV2', 'PV2', 'FV3', 'PV3'],
                    index=features)

In [12]:
for i, data_ in enumerate(all_cases):
    for f in features:
        
        reduced_features = features.copy()
        reduced_features.remove(f)
        
        # get_ECT_regression takes the regression in log-space with intercept
        
        full_regression = get_ECT_regression(data_)
        reduced_regression = get_ECT_regression(data_, features=reduced_features)
        
        TESTS.loc[f, f"FV{i}"] = np.round(get_FPvalues(full_regression, reduced_regression)[0],5)
        TESTS.loc[f, f"PV{i}"] = np.round(get_FPvalues(full_regression, reduced_regression)[1],5)
        
TESTS.index.rename("removed feature", inplace=True)
c = ["F_db2", "P_db2", "F_db5", "P_db5", "F_d0.81", "P_d0.81", # (d): decreasing; (u): unaffected
                                         "F_u0.81", "P_u0.81",]
TESTS.columns = c

In [13]:
TESTS

Unnamed: 0_level_0,F_db2,P_db2,F_db5,P_db5,F_d0.81,P_d0.81,F_u0.81,P_u0.81
removed feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
IP,961.58446,0.0,5181.43523,0.0,3182.52889,0.0,2409.83284,0.0
BT,138.99369,0.0,4.7928,0.02862,1.53515,0.21546,119.44474,0.0
NEL,494.27457,0.0,231.23438,0.0,48.49889,0.0,1468.29541,0.0
PLTH,2674.96819,0.0,9305.98576,0.0,2580.158,0.0,12154.63149,0.0
RGEO,2022.42722,0.0,2092.0354,0.0,409.27718,0.0,4509.32788,0.0
KAREA,94.66733,0.0,13.33678,0.00026,7.09798,0.00777,194.02264,0.0
EPS,131.1372,0.0,0.13161,0.71679,31.39499,0.0,246.88814,0.0
MEFF,36.507,0.0,192.29798,0.0,35.87304,0.0,217.23442,0.0
