In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../')
import tokamakTK

import seaborn as sns
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import statsmodels.api as sm
import matplotlib.patches as mpatches

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, precision_recall_fscore_support

pd.set_option('display.max_columns', None)
path = "../data/"

In [3]:
# Obtained from Optimization

min_subset_ids = pd.read_csv(path+"R_ids_alpha_0.6357.csv")

DB2 = pd.read_csv(path+"DB2P8.csv")
DB5 = pd.read_csv(path+"SELDB5_SVD.csv", low_memory=False) 

# Setting ELMy Dataset
DB5 = DB5[DB5["PHASE"].isin(['HGELM', 'HSELM', 'HGELMH', 'HSELMH'])]

# REMOVING SPHERICAL TOKAMAKS
#DB5 = DB5[~DB5.TOK.isin(['MAST', 'NSTX', 'START'])]


# There is two shots from DB2P8 missing in DB5
missing_shots = DB2[~DB2.id.isin( DB5.id.values )].reset_index(drop=True)
DB5 = pd.concat([DB5, missing_shots], axis=0, ignore_index=True)

# Labeling shots that had great impact in decreasing alpha_R
DB5.insert(loc=2,column="label",value=["unaffected"]*len(DB5))
DB5.loc[(DB5[DB5.id.isin(min_subset_ids.id)].index), "label"] = "decreasing"

In [41]:
DB5.TOK.unique()

array(['ASDEX', 'AUG', 'AUGW', 'CMOD', 'COMPASS', 'D3D', 'JET', 'JETILW',
       'JFT2M', 'JT60U', 'MAST', 'NSTX', 'PBXM', 'PDX', 'START', 'TCV',
       'TDEV', 'TFTR'], dtype=object)

In [37]:
MAST = DB5[DB5.TOK.isin(["MAST"])]
JET = DB5[DB5.TOK.isin(["JET"])]
D3D = DB5[DB5.TOK.isin(["D3D"])]

In [18]:
plasma_characteristics = ["QCYL5","BEIMHD","PREMAG","LHTIME","HYBRID",
                          "CONFIG","DWDIA","WMHD","TORQ","KAREA", "EPS","MEFF","VOL","LCOULOMB",
                          "IP","RHOSTAR","NUSTAR","BETASTAR"] 
TOK_characteristics = ["TOK","DIVNAME","WALMAT","DIVMAT","LIMMAT","AMIN","BT"]
ELM = ["ELMTYPE","ELMFREQ"]
heating = ["PECRH", "PICRH", "ICSCHEME","AUXHEAT","ECHMODE","PELLET"]
impurities = ["EVAP","ZEFF","ZEFFNEO","PRAD","POHM","ENBI","PNBI"]
power = ["PLTH","PFLOSS"]
temperatures = ["TAV","TEV","TIV"]
features = ['NUSTAR', 'BETASTAR', 'QCYL5', 'HYBRID', 'ENBI', 'VOL', 'POHM',
            'PNBI', 'DWDIA', 'BT', 'NEL', 'PFLOSS', 'KAREA', 'MEFF', 'WFICFORM', 'IP']
fast_particles = ["NESOL","WFFORM","WFICFORM","OMEGACYCL","NEL"] 


features = list(plasma_characteristics + TOK_characteristics + ELM + heating + \
                       impurities + power + temperatures  + fast_particles)

cat_features = DB5[features].select_dtypes(include=['object']).columns.tolist()

In [19]:
cat_features

['PREMAG',
 'HYBRID',
 'CONFIG',
 'TOK',
 'DIVNAME',
 'WALMAT',
 'DIVMAT',
 'LIMMAT',
 'ELMTYPE',
 'ICSCHEME',
 'AUXHEAT',
 'ECHMODE',
 'PELLET',
 'EVAP']

## MAST

In [51]:
print(pd.to_datetime(MAST.DATE.sort_values(ascending=True), format="%Y%m%d").iloc[0], "-", 
      pd.to_datetime(MAST.DATE.sort_values(ascending=True), format="%Y%m%d").iloc[-1])

2002-05-08 00:00:00 - 2005-11-11 00:00:00


In [8]:
MAST.DIVNAME.unique()

array(['RIB', 'PLATES1'], dtype=object)

In [15]:
MAST.WALMAT.unique()

array(['SS', 'C'], dtype=object)

In [16]:
MAST.DIVMAT.unique()

array(['C'], dtype=object)

In [20]:
MAST.LIMMAT.unique()

array(['C'], dtype=object)

## JET

In [52]:
print(pd.to_datetime(JET.DATE.sort_values(ascending=True), format="%Y%m%d").iloc[0], "-", 
      pd.to_datetime(JET.DATE.sort_values(ascending=True), format="%Y%m%d").iloc[-1])

1988-04-22 00:00:00 - 2004-03-03 00:00:00


In [17]:
JET.DIVNAME.unique()

array(['MARK0', 'MARKI', 'MARKIIA', 'MARKIIAP', 'MARKGB', 'MARKGBSR'],
      dtype=object)

In [11]:
JET.DIVMAT.unique()

array(['C', 'C/BE'], dtype=object)

In [13]:
JET.WALMAT.unique()

array(['IN/C', 'IN'], dtype=object)

In [21]:
JET.LIMMAT.unique()

array(['C', 'BE'], dtype=object)

## DIII-D * 

[Importance when using Moderate MCL](https://github.com/Chinnasf/Thesis/blob/main/Classification/Sklearn%20Tuning/RF_Moderate_Multicollinearity.ipynb)

In [53]:
print(pd.to_datetime(D3D.DATE.sort_values(ascending=True), format="%Y%m%d").iloc[0], "-", 
      pd.to_datetime(D3D.DATE.sort_values(ascending=True), format="%Y%m%d").iloc[-1])

1987-10-09 00:00:00 - 2004-07-29 00:00:00


In [38]:
D3D.DIVNAME.unique() # ******* RDP **********

array(['OPEN', 'ADP', 'RDP'], dtype=object)

In [39]:
D3D.DIVMAT.unique()

array(['C'], dtype=object)

In [40]:
D3D.WALMAT.unique()

array(['IN'], dtype=object)

## DB5

In [23]:
DB5["EVAP"].unique()

array(['NONE', 'CARBH', 'BOROA', 'BOROB', 'SILICON', 'BOR', 'BOROX', 'BO',
       'CARB', 'BOROC', 'BE', nan, 'TI', 'DECABORA'], dtype=object)

In [24]:
DB5["DIVMAT"].unique()

array(['TI2', 'TI1', 'CC', 'C', 'W', 'C-W', 'MO', 'C/BE', 'SS', 'IN',
       'NONE', nan], dtype=object)

In [None]:
# IN ~ Inconel 625 stainless steel

In [34]:
DB5[DB5["DIVNAME"].isin(['RDP'])].TOK.unique()

array(['D3D'], dtype=object)

In [55]:
DB5[DB5.TOK.isin(["AUGW","ASDEX","AUG"])].DIVNAME.unique()

array(['DV-IPRE', 'DV-IPOST', 'DV-II-C', 'DIV-I', 'DIV-II', 'DIV-IIb',
       'DIV-IIc', 'DIV-IId', 'DIV-III', nan], dtype=object)