In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import statsmodels.api as sm

import matplotlib.patches as mpatches

from scipy.optimize import curve_fit
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
from collections import Counter

coeffs = ['IP', 'BT', 'NEL', 'PLTH', 'RGEO', 'KAREA', 'EPS', 'MEFF']

In [3]:
# REMARK: There are no spherical TOKAMAKs in these analyses.  

DB2P8 = pd.read_csv("../data/DB2P8.csv")
DB5 = pd.read_csv("../data/DB5.csv")
# Because DB2P8 has more columns than DB5
DB2P8 = DB2P8[DB5.columns]
R = pd.read_csv("../data/R.csv")#DB5[DB5.id.isin(new_ids.id.values)] #reintroduce dataset
DB2 = DB2P8[["TAUTH"] + coeffs].apply(np.abs).apply(np.log)
DB2["id"] = DB2P8["id"]
subset_ids = pd.read_csv("../data/R_ids_alpha_0.9576.csv")

In [4]:
R_ = R[R.id.isin(subset_ids.id.values)][ ["TAUTH"] + coeffs + ["id"]]
data = pd.concat([DB2, R_], axis=0, ignore_index=True)
data

Unnamed: 0,TAUTH,IP,BT,NEL,PLTH,RGEO,KAREA,EPS,MEFF,id
0,-2.975930,-1.217734,0.790728,1.332102,0.625938,0.527093,-0.025523,-1.418447,0.405465,HDULEH
1,-3.015527,-1.220102,0.790728,1.317480,0.705076,0.521172,-0.027988,-1.409604,0.405465,NAC6N1
2,-2.752786,-1.213686,0.790728,1.226712,0.123986,0.526502,-0.010556,-1.432100,0.405465,U2T1C7
3,-2.660547,-1.217734,0.790728,1.328400,0.319181,0.526502,-0.025933,-1.417369,0.405465,422XQB
4,-2.965979,-1.223495,0.790274,1.347294,0.709513,0.521766,-0.031387,-1.406800,0.405465,WZ9FED
...,...,...,...,...,...,...,...,...,...,...
2617,-3.344779,0.019763,-0.822047,2.085501,1.823686,-0.153079,0.665889,-0.330286,0.693147,ZTMEE9
2618,-3.295009,0.020087,-0.822058,2.013420,1.727942,-0.146571,0.625362,-0.323739,0.693147,N42QKF
2619,-3.236997,0.019214,-0.822040,1.998102,1.756837,-0.153918,0.682124,-0.333793,0.693147,53LQB9
2620,-1.753886,-0.020815,1.567574,1.252763,1.631591,0.897719,-0.000300,-1.116746,0.693147,D26CA1


In [5]:
# Standardize features by removing the mean and scaling to unit variance.

X = StandardScaler().fit_transform(data[coeffs])
pd.DataFrame(X, columns=data.columns[1:-1])

Unnamed: 0,IP,BT,NEL,PLTH,RGEO,KAREA,EPS,MEFF
0,-1.601102,0.225235,-0.758309,-1.192241,-0.384296,-1.956620,-0.991710,-1.304231
1,-1.604245,0.225235,-0.791789,-1.095460,-0.401233,-1.968772,-0.954993,-1.304231
2,-1.595731,0.225235,-0.999622,-1.806107,-0.385985,-1.882838,-1.048403,-1.304231
3,-1.601102,0.225235,-0.766785,-1.567392,-0.385985,-1.958644,-0.987233,-1.304231
4,-1.608749,0.224114,-0.723524,-1.090034,-0.399534,-1.985529,-0.943348,-1.304231
...,...,...,...,...,...,...,...,...
2617,0.041298,-3.758452,0.966766,0.272551,-2.329948,1.451690,3.526686,0.396604
2618,0.041727,-3.758480,0.801721,0.155461,-2.311334,1.251915,3.553868,0.396604
2619,0.040569,-3.758435,0.766646,0.190798,-2.332350,1.531720,3.512122,0.396604
2620,-0.012558,2.144109,-0.939973,0.037627,0.675893,-1.832284,0.261048,0.396604


In [6]:
# Label -1 means noise

DB = DBSCAN(eps=1, min_samples=3).fit(X)
labels = DB.labels_

Counter(labels)

Counter({0: 410,
         1: 118,
         2: 843,
         3: 37,
         4: 7,
         5: 13,
         6: 16,
         -1: 13,
         7: 7,
         8: 876,
         9: 9,
         10: 52,
         11: 7,
         12: 15,
         13: 59,
         14: 3,
         15: 6,
         16: 17,
         17: 15,
         18: 99})

In [7]:
data["labels"] = labels
data

Unnamed: 0,TAUTH,IP,BT,NEL,PLTH,RGEO,KAREA,EPS,MEFF,id,labels
0,-2.975930,-1.217734,0.790728,1.332102,0.625938,0.527093,-0.025523,-1.418447,0.405465,HDULEH,0
1,-3.015527,-1.220102,0.790728,1.317480,0.705076,0.521172,-0.027988,-1.409604,0.405465,NAC6N1,0
2,-2.752786,-1.213686,0.790728,1.226712,0.123986,0.526502,-0.010556,-1.432100,0.405465,U2T1C7,0
3,-2.660547,-1.217734,0.790728,1.328400,0.319181,0.526502,-0.025933,-1.417369,0.405465,422XQB,0
4,-2.965979,-1.223495,0.790274,1.347294,0.709513,0.521766,-0.031387,-1.406800,0.405465,WZ9FED,0
...,...,...,...,...,...,...,...,...,...,...,...
2617,-3.344779,0.019763,-0.822047,2.085501,1.823686,-0.153079,0.665889,-0.330286,0.693147,ZTMEE9,18
2618,-3.295009,0.020087,-0.822058,2.013420,1.727942,-0.146571,0.625362,-0.323739,0.693147,N42QKF,18
2619,-3.236997,0.019214,-0.822040,1.998102,1.756837,-0.153918,0.682124,-0.333793,0.693147,53LQB9,18
2620,-1.753886,-0.020815,1.567574,1.252763,1.631591,0.897719,-0.000300,-1.116746,0.693147,D26CA1,-1
