In [1]:
%load_ext autoreload
%autoreload 2

With this notebook, I intend to clean DB5 and discern which features are of interest for the follow-up of the analyses. Once the optimization is done, and potential algorithms for clustering and classification are developed, the next step would be to subject these features to examination.

In [2]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import statsmodels.api as sm
import matplotlib.patches as mpatches

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_curve, precision_recall_fscore_support

pd.set_option('display.max_columns', None)
path = "data/"

In [3]:
# Obtained from Optimization
min_subset_ids = pd.read_csv(path+"id_vs_frequency_decreasing_ds.csv")

raw_DB5 = pd.read_excel(path+"HDB5V2.3.xlsx")


DB2 = pd.read_csv(path+"DB2P8.csv")
DB5 = pd.read_csv(path+"SELDB5_SVD.csv", low_memory=False) 
DB5 = DB5[DB5["PHASE"].isin(['HGELM', 'HSELM', 'HGELMH', 'HSELMH'])]

# There are shots missing in DB5 from DB2P8
missing_shots = DB2[~DB2.id.isin( DB5.id.values )].reset_index(drop=True)
DB5 = pd.concat([DB5, missing_shots], axis=0, ignore_index=True)

# Labeling shots that had great impact in decreasing alpha_R
DB5.insert(loc=2,column="label",value=[0]*len(DB5))
DB5.loc[(DB5[DB5.id.isin(min_subset_ids.id)].index), "label"] = 1

print(
    f"{ round( (len(min_subset_ids)/len(DB5))*100     ,2)  }% of the data decreased alpha_R\n" + 
    f"{ round( (1 - len(min_subset_ids)/len(DB5))*100 ,2)  }% of the data did not decrease alpha_R"
)

40.69% of the data decreased alpha_R
59.31% of the data did not decrease alpha_R


In [4]:
raw_DB5.shape

(14153, 192)

In [5]:
raw_DB5.DATE.astype(str).apply(lambda x: x[:4]).unique()

array(['1982', '1983', '1984', '1985', '1986', '1990', '1994', '1995',
       '1996', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2008', '2009', '2011', '2012', '2013', '2014',
       '2015', '2016', '2017', '1997', '1987', '1989', '1991', '1992',
       '1993', '1988'], dtype=object)

In [6]:
sorted(DB2.DATE.astype(str).apply(lambda x: x[:4]).unique())

['1982',
 '1983',
 '1984',
 '1985',
 '1986',
 '1987',
 '1988',
 '1989',
 '1990',
 '1991',
 '1992',
 '1993',
 '1994',
 '1995',
 '1996',
 '1997']

In [7]:
sorted(DB5.DATE.astype(str).apply(lambda x: x[:4]).unique())

['1982',
 '1983',
 '1984',
 '1985',
 '1986',
 '1987',
 '1988',
 '1989',
 '1990',
 '1991',
 '1992',
 '1993',
 '1994',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2008',
 '2009',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017']

In [8]:
sorted(DB5[~DB5.id.isin(DB2.id.values)].DATE.astype(str).apply(lambda x: x[:4]).unique())

['1989',
 '1990',
 '1994',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2008',
 '2009',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017']

In [9]:
num_features = raw_DB5.select_dtypes(include=['int', 'float']).columns.tolist()
cat_features = raw_DB5.select_dtypes(include=['object']).columns.tolist()

In [10]:
len(num_features)

164

In [11]:
len(cat_features)

28

In [12]:
len(num_features) + len(cat_features)

192

In [14]:
DB5["TAV"].describe()

count    6252.000000
mean     1584.812505
std       781.201366
min       154.322552
25%      1015.637989
50%      1395.060888
75%      2042.758782
max      8484.381925
Name: TAV, dtype: float64

In [21]:
DB5[DB5["TAV"]>8484][["TIV","TEV","TIV","TAUTH"]]

Unnamed: 0,TIV,TEV,TIV.1,TAUTH
3247,3129.0,3408.0,3129.0,0.508


In [16]:
raw_DB5["TIV"].describe()

count    4.369000e+03
mean     2.353812e+03
std      2.006984e+03
min      2.681000e-07
25%      1.198000e+03
50%      1.911000e+03
75%      2.842000e+03
max      2.086000e+04
Name: TIV, dtype: float64

In [20]:
raw_DB5[raw_DB5["TIV"]>2e+04][["TIV","TEV","TAUTH"]]

Unnamed: 0,TIV,TEV,TAUTH
6223,20860.0,4749.0,0.7791
6379,20300.0,4365.0,0.6214


In [23]:
DB5["TAUTH"].describe()

count    6252.000000
mean        0.177301
std         0.147450
min         0.002236
25%         0.069788
50%         0.125950
75%         0.244868
max         1.321000
Name: TAUTH, dtype: float64

In [26]:
DB5[DB5["TAUTH"]>1][["TIV","TEV","TIV","TAUTH","TOK","DATE"]]

Unnamed: 0,TIV,TEV,TIV.1,TAUTH,TOK,DATE
3020,2963.0,3193.0,2963.0,1.011,JET,19880506
3021,2726.0,3142.0,2726.0,1.005,JET,19880506
3029,,2617.0,,1.008,JET,19880715
3120,4293.0,2910.0,4293.0,1.195,JET,19901009
3121,5650.0,3470.0,5650.0,1.321,JET,19901009
3123,4711.0,2923.0,4711.0,1.198,JET,19901009
3124,2784.0,2814.0,2784.0,1.208,JET,19901009
3290,2535.0,2944.0,2535.0,1.07,JET,19960920


# INFORMATION IN THE DATABASE

[Description of Features](https://docs.google.com/document/d/19iVcGGwQGzrTmRmv6QWqR_BRhZhJrsfM/edit?usp=sharing&ouid=108518602132499005107&rtpof=true&sd=true)

IMPORTANT

* For all registers, there is no internal transport barrier registered.
* Not all columns might have the same units even if the represent the same; z.B.: Power. 


**IDEA** Make a note on the TOKAMAKS that use a Limiter and the ones that don't. I think not all Tokamaks have a Divertor as well. Check that. :P 

* Apparently, `TFTR` doesn't have a Divertor; but, it is hard to tell. 

#### NOTE ON HEATING AND DIVERTORS

Obtained from: **ADX: a high field, high power density, advanced divertor and RF tokamak** 

[[B. LaBombard, E. Marmar, J. Irby, T. Rognlien, M.Umansky, 2017]](https://www.osti.gov/pages/biblio/1463827)

<img src="data/Images/ADX: a high field-high power_ICRH.png" class="center" width=600>

In [13]:
# DWMHD = DWDIA
plasma_characteristics = ["QCYL5","BEIMHD","PREMAG","LHTIME","HYBRID",
                          "CONFIG","DWDIA","WMHD","TORQ"
                         ] 
TOK_characteristics = ["TOK","DIVNAME","WALMAT","DIVMAT","LIMMAT"]
ELM = ["ELMTYPE","ELMFREQ"]
heating = ["PECRH", "PICRH", "ICSCHEME","AUXHEAT","ENBI"]
impurities = ["EVAP","ZEFF","ZEFFNEO","PRAD",]
 # corrections on power loss | NBI Power lost by unconfined orbits
power = ["PLTH","PFLOSS","POHM"]
temperatures = ["TAV","TEV","TIV"]
# e-density in SOL | total due to NBI| total due to ICRH
fast_particles = ["NESOL","WFFORM","WFICFORM"] 

interesting_features = plasma_characteristics + TOK_characteristics + ELM + heating + \
                       impurities + power + temperatures + fast_particles

In [14]:
physical_variables = ["RHOSTAR","BETASTAR","NUSTAR"]
engineering_variables = ["KAREA","EPS","NEL","IP","MEFF","BT","PLTH","RGEO"]

entropy_features = ['RHOSTAR', 'BETASTAR', 'KAREA', 'EPS', 'NEL', 'TAV', 'QCYL5', 'NUSTAR']
research_features = ['TAUTH','NEL','TAV','BT','RHOSTAR','NUSTAR','BETASTAR']

In [15]:
categorical = ["PREMAG","HYBRID","CONFIG","ELMTYPE",
               "ICSCHEME","AUXHEAT","EVAP"] + TOK_characteristics 

In [16]:
# Removing: HYBRID, LHTIME DIVNAME
plasma_characteristics = ["BEIMHD", "BETASTAR", "CONFIG", "DWDIA", "EPS", "IP", "KAREA",
                          "LCOULOMB", "MEFF", "NUSTAR", "PREMAG", "Q95", "RHOSTAR",
                          "TORQ", "VOL", "WMHD"]
TOK_characteristics = ["AMIN", "BT", "DIVMAT", "LIMMAT", "TOK", "WALMAT"]
ELM = ["ELMFREQ", "ELMTYPE"]
heating = ["AUXHEAT", "ECHMODE", "ICSCHEME", "PECRH", "PELLET", "PICRH"]
impurities = ["ENBI", "EVAP", "PNBI", "POHM", "PRAD", "ZEFF", "ZEFFNEO"]
power = ["PFLOSS", "PLTH"]
temperatures = ["TAV", "TEV", "TIV"]
fast_particles = ["NEL", "NESOL", "OMEGACYCL", "WFFORM", "WFICFORM"]

features = (plasma_characteristics + TOK_characteristics + ELM + heating + 
                       impurities + power + temperatures  + fast_particles)

len(features)

47

In [17]:
sorted(impurities)

['ENBI', 'EVAP', 'PNBI', 'POHM', 'PRAD', 'ZEFF', 'ZEFFNEO']

In [18]:
"Acknowledgements".upper()

'ACKNOWLEDGEMENTS'

### TREATMENT TO CATEGORICAL DATA

#### Replacement of NaN to UNKNOWN

In [19]:
DB5[categorical] = DB5[categorical].fillna('UNKNOWN')
DB5["ICSCHEME"]   = DB5["ICSCHEME"].str.replace("OFF","NONE",regex=False)

In [20]:
DB5.TOK.unique()

array(['ASDEX', 'AUG', 'AUGW', 'CMOD', 'COMPASS', 'D3D', 'JET', 'JETILW',
       'JFT2M', 'JT60U', 'MAST', 'NSTX', 'PBXM', 'PDX', 'START', 'TCV',
       'TDEV', 'TFTR'], dtype=object)

In [21]:
DB5["DIVNAME"].unique()

array(['DV-IPRE', 'DV-IPOST', 'DV-II-C', 'DIV-I', 'DIV-II', 'DIV-IIb',
       'DIV-IIc', 'DIV-IId', 'DIV-III', 'NONAME', 'DIV1', 'OPEN', 'ADP',
       'RDP', 'MARK0', 'MARKI', 'MARKIIA', 'MARKIIAP', 'MARKGB',
       'MARKGBSR', 'ILW', 'RIB', 'PLATES1', 'UNKNOWN'], dtype=object)

In [22]:
DB5["DIVNAME"] = DB5["DIVNAME"].str.replace("(DIV-I)|(DV-IPRE)|(DV-IPOST)",
                                            "DV-I",regex=True)
DB5["DIVNAME"] = DB5["DIVNAME"].str.replace("(DIV-II)|(DV-IIc)|(DV-II-C)|(DV-IIb)|(DV-IIc)|(DV-IId)|(DV-IId)",
                                            "DV-II",regex=True)
DB5["DIVNAME"] = DB5["DIVNAME"].str.replace("(MARK0)|(MARKI)|(MARKIIA)|(MARKGB)|(MARKGBSR)|"+
                                            "(MARKIA)|(MARKIAP)|(MARKSR)|(MARKA)|(MARKP)",
                                            "MARK",regex=True)
DB5["DIVNAME"].unique()

array(['DV-I', 'DV-II', 'DV-III', 'NONAME', 'DIV1', 'OPEN', 'ADP', 'RDP',
       'MARK', 'MARKIA', 'MARKIAP', 'MARKSR', 'ILW', 'RIB', 'PLATES1',
       'UNKNOWN'], dtype=object)

In [23]:
DB5["DIVNAME"].unique()

array(['DV-I', 'DV-II', 'DV-III', 'NONAME', 'DIV1', 'OPEN', 'ADP', 'RDP',
       'MARK', 'MARKIA', 'MARKIAP', 'MARKSR', 'ILW', 'RIB', 'PLATES1',
       'UNKNOWN'], dtype=object)

In [24]:
power = ["PLTH","PFLOSS"]
temperatures = ["TAV","TEV","TIV"]
fast_particles = ["NESOL","WFFORM","WFICFORM","OMEGACYCL","NEL"] 

In [25]:
sorted(fast_particles)

['NEL', 'NESOL', 'OMEGACYCL', 'WFFORM', 'WFICFORM']

In [26]:
len(fast_particles)

5

In [27]:
strrr = "'WFICFORM', 'WFFORM', 'ZEFFNEO', 'DWDIA', 'RHOSTAR', 'POHM', 'BETASTAR', 'NEL', 'WMHD', 'TAV', 'NUSTAR', 'PFLOSS', 'EPS', 'PLTH', 'QCYL5'"

In [28]:
strrr.replace("'","")

'WFICFORM, WFFORM, ZEFFNEO, DWDIA, RHOSTAR, POHM, BETASTAR, NEL, WMHD, TAV, NUSTAR, PFLOSS, EPS, PLTH, QCYL5'

In [29]:
"INVESTIGATING THE DEPENDENCE ON MACHINE SIZE OF THE ENERGY CONFINEMENT IN TOKAMAKS USING DATA-DRIVEN METHODS".title()

'Investigating The Dependence On Machine Size Of The Energy Confinement In Tokamaks Using Data-Driven Methods'

In [30]:
len(DB5[features].select_dtypes(include=['int', 'float']).columns.tolist())

35

In [31]:
len(DB5[features].select_dtypes(include=['object']).columns.tolist())

12

In [32]:
DB5

Unnamed: 0,ind,id,label,TOK,TOK_ID,DIVNAME,LCUPDATE,DATE,SHOT,TIME,TIME_ID,T1,T2,AUXHEAT,PHASE,HYBRID,ITB,ITBTYPE,ELMTYPE,ELMFREQ,ELMMAX,ELMDUR,ELMINT,OLTIME,LHTIME,TPI,ISEQ,MEFF,PGASA,PGASZ,BGASA,BGASZ,BGASA2,BGASZ2,PELLET,FUELRATE,XGASZ,XGASA,CONFIG,RGEO,RMAG,AMIN,KAPPA,KAPPAA,KAREA,DELTA,DELTAU,DELTAL,INDENT,AREA,VOL,SURFFORM,SEPLIM,XPLIM,WALMAT,DIVMAT,LIMMAT,EVAP,DALFMP,DALFDV,IGRADB,BT,IEML,PREMAG,IP,VSURF,Q95,SH95,BEILI2,BEIMHD,BEPMHD,BETMHD,BEPDIA,BMHDMDIA,TAUCR,FBS,RHOQ2,RHOINV,NEL,NELFORM,DNELDT,NEV,NE0,NE0TSC,ZEFF,ZEFFNEO,PRAD,POHM,ENBI,PINJ,BSOURCE,PINJ2,BSOURCE2,COCTR,PNBI,PFLOSS,ECHFREQ,ECHMODE,ECHLOC,PECRHC,PECRH,ICFREQ,ICSCHEME,ICANTEN,PICRHC,PICRH,PALPHA,DWDIA,DWDIAPAR,DWMHD,DWHC,TEV,TE0,TE0TSC,TIV,TI0,TICX0,WDIA,WMHD,WKIN,WEKIN,WIKIN,WROT,WFPER,WFPAR,WFFORM,WFANI,WFICRH,WFICRHP,WFICFORM,ICFORM,WFANIIC,TAUDIA,TAUMHD,TAUTH1,TAUTH2,WTOT,WTH,PL,PLTH,TAUTOT],TAUTH,TAUC92,TAUC93,H89,HITER96L,H93,HITER92Y,HEPS97,HIPB98Y,HIPB98Y1,HIPB98Y2,HIPB98Y3,HIPB98Y4,OMGAIMP0,OMGAIMPH,OMGAM0,OMGAMH,SPIN,TORQ,TORQBM,TORQIN,VTOR0,VTORV,VTORIMP,STANDARD,SELDB1,SELDB2,SELDB2X,IAEA92,DB2P5,DB2P8,DB3IS,DB3V5,IAE2000N,IAE2000X,HMWS2003,IAE2004S,IAE2004I,DB3DONLY,HMWS2005,OJK2006,SELDB3,SELDB3X,SELDB4,AAREA,STDDB4V5,NEGW,NEL/NEGW,NUSTAREAV,QCYL,NUSTARECYL,RHOSTAR,BETATHT,TI0/TE0,BETAN,NESEP,NESOL,PMAIN,PDIV,GP_MAIN,GP_DIV,SELDB5,STD3,TAV,BETATHTP,BETATHNP,NUSTARAVP,NUSTARAVPNOQ,RHOSTARAVP,TE0/TAV,NEO/NAV,H98y2 calc,ABS IP,EPSILON,LTAUTH,LIP,LBT,LNEL,LKAPPA,L(1+DELTA),LRGEO,LEPSILON,LMEFF,LPLTH,LTAUTH/NEL^0.2,LBTAUTH,LNUSTAR,LNUSTARNOQ,LRHOSTAR,LQ95,LBETATH,database_index,EPS,LCOULOMB,OMEGACYCL,QCYL5,TAUBOHM,BETASTAR,NUSTAR,IP_error,BT_error,NEL_error,PLTH_error,POHM_error,PNBI_error,DWDIA_error,DWMHD_error,PICRH_error,PECRH_error,PL_error,PFLOSS_error,RGEO_error,AMIN_error,EPS_error,VOL_error,KAREA_error,MEFF_error
0,12405,HDULEH,0,ASDEX,1.0,DV-I,19940125.0,19820622,5980,1.205,1205.0,1.203,1.207,NB,HGELM,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,1.110,1.166,6.0,NONE,1.5,2.0,1.0,1.0,1.0,0.0,0.0,NONE,,0.0,0.0,DN,1.694,1.8030,0.4101,0.9752,0.975092,0.9748,0.0000,0.0000,0.00000,0.0,0.5152,5.4820,27.088080,0.1564,,SS,TI2,NONE,NONE,,,,2.205,0.0,UNKNOWN,0.2959,0.23200,4.563,,2.649,2.0190,2.019,0.009265,1.6670,0.4481,5.423056,,,0.277121,3.789,0.0,-1.959000e+19,3.017000e+19,5.100000e+19,,,,0.0,68660.0,42000.00,2620000.0,404020.0,0.0,0.0,1.0000,2599000.0,585700.0,0.000000e+00,NONE,NONE,0.0,0.0,0.0,NONE,NONE,0.0,0.0,0.0,212000.0,620500.0,212000.0,1.0,,,,,,,116400.0,141100.0,,,,,,,24260.0,0.5780,0.0,0.0,0.0,0.0,0.0,0.04746,0.05753,,0.05100,119600.0,95370.0,2456000.0,1.8700,0.04871,0.05100,0.6667,0.6667,1.612,1.413,0.9272,1.103,1.126,1.150,1.119,1.132,1.130,1.0110,,,,,,,,,,,,1.0,111.0,1.111111e+09,1.111111e+09,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.111111e+09,111100.0,0.0,,1.0,5.600360,0.676564,,3.608531,,0.006430,0.002989,,2.831384,,,,,,,1.0,1.0,1086.986716,0.005725,1.749560,0.524442,0.114934,0.004561,,1.690421,2.049516,295900.0,0.242090,-2.975930,12.597777,0.790728,45.081219,-0.025113,0.000000,0.527093,-1.418447,0.405465,14.441449,-11.992173,-2.185202,-0.645420,-2.163401,-5.390209,1.517980,-5.162913,2.0,0.242090,15.406717,1.470000,3.606992,0.074970,0.601125,0.223030,,,,,,,,,,,,,,,,,,
1,12406,NAC6N1,0,ASDEX,1.0,DV-I,19940125.0,19820622,5980,1.224,1224.0,1.222,1.226,NB,HGELM,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,1.110,1.166,7.0,NONE,1.5,2.0,1.0,1.0,1.0,0.0,0.0,NONE,,0.0,0.0,DN,1.684,1.7970,0.4113,0.9724,0.972233,0.9724,0.0000,0.0000,0.00000,0.0,0.5167,5.4680,26.969199,0.1643,,SS,TI2,NONE,NONE,,,,2.205,0.0,UNKNOWN,0.2952,0.07778,4.671,,2.720,2.0900,2.090,0.009540,1.7510,0.4348,16.042211,,,0.273998,3.734,0.0,-4.468000e+19,2.940000e+19,5.200000e+19,,,,0.0,22960.0,42000.00,2620000.0,404020.0,0.0,0.0,1.0000,2598000.0,596700.0,0.000000e+00,NONE,NONE,0.0,0.0,0.0,NONE,NONE,0.0,0.0,0.0,0.0,-485.4,0.0,1.0,,,,,,,121100.0,144500.0,,,,,,,25220.0,0.5782,0.0,0.0,0.0,0.0,0.0,0.04628,0.05522,,0.04902,124400.0,99230.0,2621000.0,2.0240,0.04748,0.04902,0.6667,0.6667,1.641,1.470,0.9572,1.139,1.165,1.185,1.152,1.169,1.167,1.0440,,,,,,,,,,,,1.0,111.0,1.111111e+09,1.111111e+09,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.111111e+09,111100.0,0.0,,1.0,5.554557,0.672241,,3.649662,,0.006596,0.003119,,2.930889,,,,,,,1.0,1.0,1150.578521,0.005972,1.834713,0.464241,0.099388,0.004679,,1.768707,2.094945,295200.0,0.244240,-3.015527,12.595408,0.790728,45.066597,-0.027988,0.000000,0.521172,-1.409604,0.405465,14.520586,-12.028846,-2.224799,-0.767352,-2.308725,-5.364703,1.541373,-5.120679,2.0,0.244240,15.470883,1.470000,3.647604,0.072059,0.627056,0.195418,,,,,,,,,,,,,,,,,,
2,12411,U2T1C7,0,ASDEX,1.0,DV-I,19940125.0,19820622,5982,1.188,1188.0,1.186,1.190,NB,HSELM,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,1.115,1.165,5.0,NONE,1.5,2.0,1.0,1.0,1.0,0.0,0.0,NONE,,0.0,0.0,DN,1.693,1.7870,0.4043,0.9895,0.989248,0.9895,0.0000,0.0000,0.00000,0.0,0.5080,5.4050,26.880691,0.1623,,SS,TI2,NONE,NONE,,,,2.205,0.0,UNKNOWN,0.2971,-0.05525,4.275,,2.289,1.6600,1.660,0.007679,1.3130,0.4427,22.850781,,,0.282055,3.410,0.0,1.926000e+20,2.830000e+19,4.500000e+19,,,,0.0,0.0,42000.00,2620000.0,404020.0,0.0,0.0,1.0000,2590000.0,558000.0,0.000000e+00,NONE,NONE,0.0,0.0,0.0,NONE,NONE,0.0,0.0,0.0,815400.0,1187000.0,942200.0,1.0,,,,,,,92490.0,117000.0,,,,,,,23670.0,0.5724,0.0,0.0,0.0,0.0,0.0,0.05230,0.07126,,0.06375,95840.0,72170.0,1690000.0,1.1320,0.05671,0.06375,0.6667,0.6667,1.564,1.261,0.8309,1.016,1.040,1.084,1.051,1.039,1.037,0.9231,,,,,,,,,,,,0.0,1.0,1.111101e+09,1.111011e+09,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.111111e+09,111100.0,0.0,,1.0,5.785563,0.589398,,3.545410,,0.006023,0.002295,,2.304169,,,,,,,1.0,1.0,927.006370,0.004394,1.318480,0.617943,0.144548,0.004272,,1.590106,2.053766,297100.0,0.238807,-2.752786,12.601824,0.790728,44.975829,-0.010556,0.000000,0.526502,-1.432100,0.405465,13.939497,-11.747952,-1.962059,-0.481360,-1.934144,-5.455567,1.452784,-5.427507,2.0,0.238807,15.300207,1.470000,3.544311,0.093713,0.461374,0.274718,,,,,,,,,,,,,,,,,,
3,12412,422XQB,0,ASDEX,1.0,DV-I,19940125.0,19820622,5982,1.216,1216.0,1.214,1.218,NB,HGELM,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,1.115,1.165,6.0,NONE,1.5,2.0,1.0,1.0,1.0,0.0,0.0,NONE,,0.0,0.0,DN,1.693,1.8020,0.4103,0.9748,0.974708,0.9744,0.0000,0.0000,0.00000,0.0,0.5155,5.4820,27.079877,0.1570,,SS,TI2,NONE,NONE,,,,2.205,0.0,UNKNOWN,0.2959,0.10690,4.574,,2.658,2.0280,2.028,0.009310,1.6810,0.4425,11.762454,,,0.276798,3.775,0.0,6.995000e+19,3.082000e+19,4.900000e+19,,,,0.0,31630.0,42000.00,2620000.0,404020.0,0.0,0.0,1.0000,2599000.0,584900.0,0.000000e+00,NONE,NONE,0.0,0.0,0.0,NONE,NONE,0.0,0.0,0.0,683900.0,659500.0,662500.0,1.0,,,,,,,117400.0,141600.0,,,,,,,24490.0,0.5772,0.0,0.0,0.0,0.0,0.0,0.06040,0.07206,,0.06990,120700.0,96200.0,1961000.0,1.3760,0.06154,0.06991,0.6667,0.6667,1.821,1.553,1.0370,1.242,1.264,1.303,1.260,1.259,1.257,1.1210,,,,,,,,,,,,1.0,111.0,1.111111e+09,1.111011e+09,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.111111e+09,111100.0,0.0,,1.0,5.594901,0.674721,,3.612740,,0.006467,0.003015,,2.846524,,,,,,,1.0,1.0,1100.512996,0.005775,1.765647,0.510089,0.111519,0.004587,,1.589877,2.411039,295900.0,0.242351,-2.660547,12.597777,0.790728,45.077517,-0.025523,0.000000,0.526502,-1.417369,0.405465,14.134691,-11.676050,-1.869819,-0.673170,-2.193558,-5.384513,1.520388,-5.154247,2.0,0.242351,15.420935,1.470000,3.611224,0.102768,0.606357,0.216753,,,,,,,,,,,,,,,,,,
4,12413,WZ9FED,0,ASDEX,1.0,DV-I,19940125.0,19820622,5982,1.244,1244.0,1.242,1.246,NB,HGELM,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,1.115,1.165,7.0,NONE,1.5,2.0,1.0,1.0,1.0,0.0,0.0,NONE,,0.0,0.0,DN,1.685,1.8020,0.4127,0.9690,0.969012,0.9691,0.0000,0.0000,0.00000,0.0,0.5185,5.4900,27.031096,0.1619,,SS,TI2,NONE,NONE,,,,2.204,0.0,UNKNOWN,0.2942,0.12590,4.758,,2.809,2.1780,2.178,0.009887,1.8420,0.4313,9.883040,,,0.272429,3.847,0.0,-3.992000e+19,2.993000e+19,5.400000e+19,,,,0.0,37030.0,42000.00,2620000.0,404020.0,0.0,0.0,1.0000,2600000.0,604000.0,0.000000e+00,NONE,NONE,0.0,0.0,0.0,NONE,NONE,0.0,0.0,0.0,0.0,140300.0,0.0,1.0,,,,,,,126600.0,149700.0,,,,,,,25150.0,0.5800,0.0,0.0,0.0,0.0,0.0,0.04806,0.05683,,0.05151,129900.0,104700.0,2637000.0,2.0330,0.04925,0.05151,0.6667,0.6667,1.707,1.538,1.0090,1.194,1.218,1.238,1.202,1.221,1.219,1.0900,,,,,,,,,,,,1.0,111.0,1.111111e+09,1.111111e+09,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.111111e+09,111100.0,0.0,,1.0,5.498247,0.699678,,3.670684,,0.006642,0.003281,,3.056806,,,,,,,1.0,1.0,1173.621999,0.006282,1.942113,0.466644,0.098076,0.004712,,1.804210,2.190500,294200.0,0.244926,-2.965979,12.592015,0.790274,45.096410,-0.031491,0.000000,0.521766,-1.406800,0.405465,14.525023,-11.985261,-2.175705,-0.762188,-2.322016,-5.357733,1.559827,-5.070129,2.0,0.244926,15.475806,1.469333,3.668909,0.075685,0.659569,0.193993,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6247,24964,K19A5H,0,TDEV,14.0,NONAME,19990916.0,19980408,33171,0.800,800.0,,,EC,HSELM,UNKNOWN,UNKNOWN,,UNKNOWN,,4.180000e+19,0.000285,1.530000e+16,,,0.0,NONE,2.0,2.0,1.0,0.0,0.0,0.0,0.0,NONE,,0.0,0.0,SN,0.835,0.8539,0.2153,1.0810,1.119306,1.1030,0.1447,0.2282,0.06121,0.0,0.1630,0.8426,7.390283,0.0502,0.07441,CSS,CC,C,BOROC,,,1.0,1.951,0.0,NO,-0.2198,-0.76200,3.131,,1.188,0.6438,,0.006527,0.3408,,0.604551,,,0.374332,5.337,0.0,-6.362000e+18,4.513000e+19,6.300000e+19,,,,84760.0,167500.0,0.00,0.0,0.0,0.0,0.0,,0.0,0.0,1.100000e+11,X,OUT,207200.0,207200.0,0.0,NONE,NONE,0.0,0.0,0.0,-1447.0,,-1447.0,0.0,689.6,1928.0,1618.0,238.4,397.4,,6481.0,12240.0,9567.0,7276.0,2291.0,,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.01759,0.03267,0.02553,,9567.0,9567.0,376200.0,0.3762,0.02543,0.02543,1.0000,1.0000,1.469,1.235,0.9445,1.236,1.217,1.248,1.228,1.092,1.204,1.0390,,,,,,,,,,,,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.111111e+09,1000.0,0.0,,1.0,15.093506,0.353596,,2.671424,0.534630,0.010880,0.002543,0.206120,1.247347,,,,,,,1.0,1.0,503.655066,0.004773,0.912081,1.024655,0.327261,0.007718,3.828017,1.395967,1.091508,219800.0,0.257844,-3.671826,12.300473,0.668342,45.423780,0.077887,0.135143,-0.180324,-1.355399,0.693147,12.837876,-12.756582,-3.003484,0.024356,-1.116996,-4.864251,1.141352,-5.344853,2.0,0.257844,14.466163,0.975500,2.757718,0.024807,0.501128,0.471050,,,,,,,,,,,,,,,,,,
6248,26411,D26CA1,1,TFTR,13.0,NONAME,19980115.0,19900124,45980,4.600,4600.0,,,NB,HSELM,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,,,0.0,NONE,2.0,2.0,1.0,2.0,1.0,0.0,0.0,NONE,,0.0,0.0,LIM,2.454,2.6720,0.8033,0.9339,0.933779,0.9997,0.0000,0.0000,0.00000,0.0,1.8930,31.2500,75.295603,0.0000,,C,NONE,C,NONE,,,,4.795,0.0,UNKNOWN,0.9794,-0.52440,8.050,,2.535,1.3070,1.488,0.001809,0.9427,,11.503916,,,0.165961,3.500,0.0,1.387000e+19,2.847000e+19,4.900000e+19,,,,,-732600.0,90.07,11400000.0,622711.0,0.0,0.0,0.5011,10310000.0,4297000.0,0.000000e+00,NONE,NONE,0.0,0.0,0.0,NONE,NONE,0.0,0.0,0.0,162800.0,,162800.0,0.0,1797.0,4517.0,,2198.0,6579.0,,1046000.0,1449000.0,885200.0,466400.0,418800.0,,186600.0,184900.0,371500.0,0.5023,0.0,0.0,0.0,0.0,0.0,0.09956,0.12670,0.14460,0.14460,1449000.0,885200.0,9410000.0,5.1120,0.15400,0.17310,1.0000,1.0000,2.164,2.403,1.0020,1.445,1.465,1.488,1.266,1.309,1.351,1.0880,,,,,,,,,,,,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.111111e+09,100.0,0.0,,1.0,4.831200,0.724458,,6.025521,0.249074,0.002314,0.001102,1.456498,0.711451,,,,,,,1.0,1.0,1916.017316,0.001971,0.775248,0.258689,0.032135,0.001642,2.357494,1.721110,1.993508,979400.0,0.327343,-1.753886,13.794695,1.567574,45.001880,-0.068386,0.000000,0.897719,-1.116746,0.693147,15.447101,-10.754262,-0.186312,-1.352128,-3.437800,-6.412122,2.085672,-6.229105,2.0,0.327343,16.013226,2.397500,6.010681,0.415007,0.206978,0.105809,,,,,,,,,,,,,,,,,,
6249,26412,UPV67I,1,TFTR,13.0,NONAME,19980115.0,19900124,45984,4.200,4200.0,,,NB,HSELM,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,,,0.0,NONE,2.0,2.0,1.0,2.0,1.0,0.0,0.0,NONE,,0.0,0.0,LIM,2.456,2.6860,0.8052,0.9201,0.920049,0.9984,0.0000,0.0000,0.00000,0.0,1.8740,31.3800,75.017295,0.0000,,C,NONE,C,NONE,,,,4.791,0.0,UNKNOWN,0.9720,-0.64350,8.188,,2.697,1.4460,1.691,0.001995,0.9552,,9.311503,,,0.166476,3.426,0.0,3.110000e+19,2.759000e+19,4.900000e+19,,,,,-777100.0,90.07,11300000.0,622711.0,0.0,0.0,0.5055,10280000.0,3971000.0,0.000000e+00,NONE,NONE,0.0,0.0,0.0,NONE,NONE,0.0,0.0,0.0,536100.0,,536100.0,0.0,1922.0,4376.0,,2293.0,7233.0,,1045000.0,1581000.0,908800.0,478800.0,430000.0,,201000.0,196000.0,397100.0,0.5063,0.0,0.0,0.0,0.0,0.0,0.10460,0.14280,0.13630,0.13630,1581000.0,908800.0,8964000.0,4.9930,0.17640,0.18200,1.0000,1.0000,2.454,2.544,1.0590,1.532,1.558,1.579,1.330,1.374,1.417,1.1400,,,,,,,,,,,,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.111111e+09,100.0,0.0,,1.0,4.772097,0.717923,,6.006869,0.212704,0.002362,0.001144,1.652879,0.791784,,,,,,,1.0,1.0,2001.262746,0.002019,0.801211,0.236159,0.028842,0.001675,2.186619,1.776006,2.056451,972000.0,0.327850,-1.703749,13.787111,1.566739,44.980510,-0.083273,0.000000,0.898534,-1.115199,0.693147,15.423547,-10.699851,-0.137009,-1.443250,-3.545920,-6.391885,2.102670,-6.205276,2.0,0.327850,16.067440,2.395500,5.985773,0.435981,0.211969,0.094721,,,,,,,,,,,,,,,,,,
6250,14027,9GJ6AF,0,AUG,,UNKNOWN,,19960521,8175,1.502,,,,UNKNOWN,HGELM,UNKNOWN,,,UNKNOWN,,,,,,,,,2.0,,,,,,,,,,,UNKNOWN,1.651,,0.4918,,,1.5910,,,,,,12.5400,,,,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,,,,-2.506,,UNKNOWN,1.0010,,4.023,,,,,,,,,,,,6.095,,,,,,0.0,,,192800.0,,,,,,,7204000.0,1354000.0,,,,,0.0,,UNKNOWN,,,0.0,,274800.0,,292600.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7050000.0,5.7000,,0.09657,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.006822,,,,,,,,,,,,1704.528852,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.297880,15.618913,1.253000,2.952384,0.121002,1.173949,0.086448,"(0.9909899999999999, 1.01101)","(-2.48094, -2.5310599999999996)","(5.91215, 6.27785)","(5.586, 5.814)","(163880.0, 221720.0)","(6.4836e6, 7.9244e6)","(192360.0, 357240.0)","(204820.0, 380380.0)","(0.0, 0.0)","(0.0, 0.0)","(6.44266e6, 7.76574e6)","(1.0832e6, 1.6248e6)","(1.6427450000000001, 1.659255)","(0.48688200000000004, 0.496718)","(0.2934341014491444, 0.3023707270452809)","(12.163799999999998, 12.9162)","(1.5052429379008063, 1.6803023243244037)","(1.6, 2.4)"


In [39]:
DB5[["IP","BT","MEFF","EPS","TAUTH"]]

Unnamed: 0,IP,BT,MEFF,EPS,TAUTH
0,0.2959,2.205,1.5,0.242090,0.05100
1,0.2952,2.205,1.5,0.244240,0.04902
2,0.2971,2.205,1.5,0.238807,0.06375
3,0.2959,2.205,1.5,0.242351,0.06991
4,0.2942,2.204,1.5,0.244926,0.05151
...,...,...,...,...,...
6247,-0.2198,1.951,2.0,0.257844,0.02543
6248,0.9794,4.795,2.0,0.327343,0.17310
6249,0.9720,4.791,2.0,0.327850,0.18200
6250,1.0010,-2.506,2.0,0.297880,0.09657


In [35]:
DB5[["IP","BT","MEFF","TAUTH"]].describe()

Unnamed: 0,IP,BT,MEFF,TAUTH
count,6252.0,6252.0,6252.0,6252.0
mean,-0.400828,-1.272335,1.916213,0.177301
std,1.555441,1.84576,0.286293,0.14745
min,-5.134,-5.821,1.0,0.002236
25%,-1.968693,-2.482,1.981666,0.069788
50%,0.3718,-2.011,2.0,0.12595
75%,0.998,-0.929075,2.0,0.244868
max,2.472853,4.795,3.890214,1.321


In [38]:
df = DB5[["IP","BT","MEFF","EPS","TAUTH"]]
df[df.MEFF < 1.7]

Unnamed: 0,IP,BT,MEFF,EPS,TAUTH
0,0.2959,2.205,1.5,0.242090,0.05100
1,0.2952,2.205,1.5,0.244240,0.04902
2,0.2971,2.205,1.5,0.238807,0.06375
3,0.2959,2.205,1.5,0.242351,0.06991
4,0.2942,2.204,1.5,0.244926,0.05151
...,...,...,...,...,...
6161,0.3876,1.768,1.5,0.285714,0.03944
6162,0.3846,1.758,1.5,0.285714,0.03815
6163,0.3764,1.767,1.5,0.285714,0.03310
6164,0.3747,1.767,1.5,0.285714,0.03299
