# Frequency in IDs that Decreases $\alpha_R$

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import statsmodels.api as sm

import matplotlib.patches as mpatches

from scipy.optimize import curve_fit
from sklearn.linear_model import LinearRegression

from IPython.core.debugger import Pdb #Pdb().set_trace()


pd.set_option('display.max_columns', None)

coeffs = ['IP', 'BT', 'NEL', 'PLTH', 'RGEO', 'KAREA', 'EPS', 'MEFF']
path = "../data/"

In [2]:
random_sampling = pd.read_csv(path + "decreased_dataset_random_sampling_500_decreasing_points.csv")

In [3]:
df = random_sampling.describe().T

threshold = 0.75
min_subset_size = int(df[df['min'] < threshold].index[0].split("_")[-1])

amounts = random_sampling.columns[min_subset_size - 1:]
max_alpha_R = round((random_sampling[amounts].describe().T)["max"].sort_values().iloc[-2], 4)

random_sampling = random_sampling[random_sampling.columns[min_subset_size-1:]]
print(f"After sampling size of {min_subset_size}, all alpha-R in ramdon sampling will be < {max_alpha_R}")

After sampling size of 1061, all alpha-R in ramdon sampling will be < 0.8631


In [4]:
DB2P8 = pd.read_csv(path+"DB2P8.csv")
DB5 = pd.read_csv(path+"DB5.csv")
DB2 = DB2P8[DB5.columns] # Because DB2P8 has more columns than DB5

# There is two shots from DB2P8 missing in DB5
missing_shots = DB2[~DB2.id.isin( DB5.id.values )].reset_index(drop=True)
DB5 = pd.concat([DB5, missing_shots], axis=0, ignore_index=True)

decreasing_ds = pd.read_csv(path+"decreasing_dataset_info.csv")
# Re-Introduce Dataset | What's new in DB5 that decreases αR
R_dec = DB5[DB5.id.isin(decreasing_ds.id)].reset_index(drop=True) 

R_dec["decreasing_pts"] = R_dec["id"].map(dict(zip(decreasing_ds.id, decreasing_ds.decreased)))
R_dec["decreasing_weights"] = R_dec["id"].map(dict(zip(decreasing_ds.id, decreasing_ds.weights)))

In [5]:
def get_regression(_R):
    """
    ASSUMING DATA IS ***NOT*** GIVEN IN LOG-SCALE
    """
    data = pd.concat([DB2, _R],
                     axis=0, 
                     ignore_index=True
                    )
    Y_ = data[["TAUTH"]].apply(np.log).to_numpy()
    # Adding a column for the intercept
    _df = data[coeffs].apply(np.abs).apply(np.log)
    _df.insert(
        loc = 0, 
        column = "intercept", 
        value = np.ones(len(_df))
    )
    X_ = _df.to_numpy()
    n_, p_ = X_.shape
    model = sm.OLS(Y_,X_)
    regression = model.fit()
    return data, regression, (n_,p_)

# Getting regression of DB2P8 only. 
empty_R = R_dec[R_dec.id.isin([0])]
regression_DB2= get_regression( empty_R )[1]
#regression_DB2.summary()

In [6]:
random_sampling.iloc[0]

subset_1061      0.809802
subset_1062      0.812110
subset_1063      0.813625
subset_1064      0.812508
subset_1065      0.815533
                  ...    
subset_1483      0.656096
subset_1484      0.656539
subset_1485      0.656929
subset_1486      0.655801
seed           213.000000
Name: 0, Length: 427, dtype: float64

In [14]:
R = R_dec.sample(n=1065, weights='decreasing_pts', random_state=213)
get_regression( R )[1].summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.942
Model:,OLS,Adj. R-squared:,0.942
Method:,Least Squares,F-statistic:,4800.0
Date:,"Mon, 27 Mar 2023",Prob (F-statistic):,0.0
Time:,15:35:08,Log-Likelihood:,376.33
No. Observations:,2375,AIC:,-734.7
Df Residuals:,2366,BIC:,-682.7
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.3104,0.040,-57.387,0.000,-2.389,-2.231
x1,1.3269,0.024,55.987,0.000,1.280,1.373
x2,0.0563,0.022,2.581,0.010,0.014,0.099
x3,-0.0723,0.016,-4.446,0.000,-0.104,-0.040
x4,-0.5470,0.013,-43.259,0.000,-0.572,-0.522
x5,0.8318,0.040,20.705,0.000,0.753,0.911
x6,0.0635,0.040,1.600,0.110,-0.014,0.141
x7,-0.3788,0.051,-7.469,0.000,-0.478,-0.279
x8,0.1449,0.028,5.089,0.000,0.089,0.201

0,1,2,3
Omnibus:,15.591,Durbin-Watson:,1.329
Prob(Omnibus):,0.0,Jarque-Bera (JB):,22.418
Skew:,0.001,Prob(JB):,1.35e-05
Kurtosis:,3.476,Cond. No.,51.9


In [8]:
R_dec

Unnamed: 0,ind,id,PHASE,TOK,IP,BT,NEL,PLTH,RGEO,KAREA,EPS,MEFF,TAUTH,DATE,SHOT,TIME,Q95,ZEFF,AMIN,VOL,POHM,PNBI,DWDIA,DWMHD,PICRH,PECRH,PL,PFLOSS,TAV,LCOULOMB,QCYL5,TAUBOHM,RHOSTAR,BETASTAR,NUSTAR,OMEGACYCL,decreasing_pts,decreasing_weights
0,13947,ZUEQ42,HGELM,AUG,0.9971,-2.5080,7.120,4.4300,1.6650,1.5920,0.299159,2.0,0.089890,19960502,8006,3.400,3.977,,0.4981,12.9800,400100.0,4981000.0,3578.0,17380.0,0.0,0.0,5320000.0,887000.0,1020.565150,15.028261,3.013448,0.112722,0.005208,0.819783,0.277214,1.254000,1,1.000393
1,13956,X1F9P2,HSELM,AUG,0.7961,-1.9230,7.492,6.0300,1.6640,1.6100,0.297416,1.5,0.051740,19960503,8030,2.550,3.839,,0.4949,12.9500,425000.0,7078000.0,559.5,-34700.0,0.0,0.0,7450000.0,1415000.0,761.312828,14.709729,2.888794,0.066331,0.005113,1.094550,0.495887,1.282000,1,1.000393
2,13966,4RQGW9,HGELM,AUG,0.9988,-2.5070,7.040,4.3600,1.6570,1.5950,0.298431,2.0,0.091890,19960507,8045,2.000,3.950,,0.4945,12.7600,373600.0,4918000.0,36200.0,29690.0,0.0,0.0,5230000.0,876600.0,1054.965718,15.067062,2.981822,0.115184,0.005336,0.838563,0.254183,1.253500,0,1.000000
3,13975,SCOKNW,HGELM,AUG,1.1990,-2.9740,8.741,4.7300,1.6610,1.6040,0.296026,2.0,0.113000,19960509,8092,1.800,3.858,,0.4917,12.7100,398300.0,4979000.0,,-112500.0,0.0,0.0,5470000.0,734600.0,1139.835077,15.036230,2.924405,0.168031,0.004702,0.799382,0.268479,1.487000,0,1.000000
4,13977,NIJVMQ,HSELM,AUG,1.1970,-2.9740,9.431,3.3300,1.6700,1.6160,0.291916,2.0,0.124400,19960509,8092,2.250,3.731,,0.4875,12.6600,757200.0,2907000.0,,-85120.0,0.0,0.0,3750000.0,426000.0,820.096900,14.669024,2.879855,0.184983,0.004022,0.620546,0.551965,1.487000,1,1.000393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2539,24915,AEWB71,HSELM,START,0.2141,0.2831,4.031,0.3316,0.2804,1.5010,0.764622,1.7,0.004111,19980303,36080,0.043,5.162,,0.2144,0.3818,331600.0,0.0,,0.0,0.0,0.0,331600.0,0.0,209.663690,13.730101,1.734460,0.000685,0.044790,7.483237,0.080593,0.166529,1,1.000393
2540,24916,9OQ1PX,HSELM,START,0.2722,0.2964,5.708,0.5206,0.3008,1.3730,0.780585,1.7,0.004107,19980327,36429,0.038,5.538,,0.2348,0.4493,520600.0,0.0,,0.0,0.0,0.0,520600.0,0.0,197.361941,13.495708,1.474930,0.000716,0.037900,9.099639,0.111958,0.174353,0,1.000000
2541,24917,JKURI6,HSELM,START,0.2634,0.2909,5.101,0.7729,0.3103,1.2920,0.783113,1.7,0.002859,19980327,36437,0.039,5.721,,0.2430,0.4670,322000.0,785000.0,,25000.0,0.0,0.0,1082000.0,309100.0,219.532894,13.658387,1.507336,0.000489,0.039354,9.390759,0.085860,0.171118,0,1.000000
2542,26411,D26CA1,HSELM,TFTR,0.9794,4.7950,3.500,5.1120,2.4540,0.9997,0.327343,2.0,0.173100,19900124,45980,4.600,8.050,,0.8033,31.2500,-732600.0,10310000.0,162800.0,162800.0,0.0,0.0,9410000.0,4297000.0,1916.017316,16.013226,6.010681,0.415007,0.002314,0.206978,0.105809,2.397500,0,1.000000
