In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from scipy.optimize import curve_fit

### Using Scipy

[`curve_fit` documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html)

Use non-linear least squares to fit a function, f, to data.

Assumes `ydata = f(xdata, *params) + eps`.



In [2]:
def τ_lth(X, lα0, αI, αB, αn, αP, αR, αk, αε, αM):
    I, B, n, P, R, k, ε, M = X
    return lα0 + (I*αI) + (B*αB) + (n*αn) + (P*αP) + (R*αR) + (k*αk) + (ε*αε) + (M*αM)

In [3]:
data = pd.read_csv("data/O.csv") # logarithmic data 
data.head()

Unnamed: 0,TAUTH,IP,BT,NEL,PLTH,RGEO,KAREA,EPS,MEFF,TOK,SHOT,DATE,TIME,ind
0,-2.97593,-1.217734,0.790728,1.332102,0.625938,0.527093,-0.025523,-1.418447,0.405465,ASDEX,5980,19820622,1.205,6
1,-3.015527,-1.220102,0.790728,1.31748,0.705076,0.521172,-0.027988,-1.409604,0.405465,ASDEX,5980,19820622,1.224,7
2,-2.752786,-1.213686,0.790728,1.226712,0.123986,0.526502,-0.010556,-1.4321,0.405465,ASDEX,5982,19820622,1.188,12
3,-2.660547,-1.217734,0.790728,1.3284,0.319181,0.526502,-0.025933,-1.417369,0.405465,ASDEX,5982,19820622,1.216,13
4,-2.965979,-1.223495,0.790274,1.347294,0.709513,0.521766,-0.031387,-1.4068,0.405465,ASDEX,5982,19820622,1.244,14


In [4]:
## Removing the spherical TOKAMAKS
#data = data[~data.TOK.isin(['MAST', 'NSTX'])]

In [5]:
y = data.TAUTH.values
X = (
    data.IP.values, data.BT.values, data.NEL.values, data.PLTH.values, 
    data.RGEO.values, data.KAREA.values, data.EPS.values, data.MEFF.values
)

`popt`: Optimal values for the parameters so that the sum of the squared residuals of f(xdata, *popt) – ydata is minimized.

`pcov`: The estimated covariance of popt. The diagonals provide the variance of the parameter estimate. 

In [6]:
popt, pcov = curve_fit(τ_lth, X, y)

In [7]:
baseline = pd.read_csv("data/Baseline_regression_values.csv")
baseline

Unnamed: 0,α0,αI,αB,αP,αn,αM,αR,αe,αk
0,0.105675,0.770171,0.238516,-0.608801,0.383818,0.184438,1.99426,0.715733,0.432187


In [8]:
# Intercept
np.exp(popt[0])

0.07116439643743085

In [9]:
# Exponents
popt[1:]

array([ 0.77017136,  0.2385164 ,  0.38381842, -0.60880113,  1.9942594 ,
        0.43218689,  0.7157326 ,  0.18443849])

### Using SKlearn

In [10]:
from sklearn.linear_model import LinearRegression

y = data[["TAUTH"]]
X = data[data.columns[1:9]]

regressor = LinearRegression()
regressor.fit(X,y)

v = pd.DataFrame(regressor.coef_, index=["Coefficient"]).transpose()
w = pd.DataFrame(X.columns, columns=["Attribute"])
coeff_df = pd.concat([w,v], axis=1, join="inner")
coeff_df

Unnamed: 0,Attribute,Coefficient
0,IP,0.770171
1,BT,0.238516
2,NEL,0.383818
3,PLTH,-0.608801
4,RGEO,1.994259
5,KAREA,0.432187
6,EPS,0.715733
7,MEFF,0.184438


In [11]:
np.exp(regressor.intercept_)

array([0.0711644])

In [12]:
baseline

Unnamed: 0,α0,αI,αB,αP,αn,αM,αR,αe,αk
0,0.105675,0.770171,0.238516,-0.608801,0.383818,0.184438,1.99426,0.715733,0.432187
