In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from scipy.optimize import curve_fit

### Using Scipy

[`curve_fit` documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html)

Use non-linear least squares to fit a function, f, to data.

Assumes `ydata = f(xdata, *params) + eps`.



In [2]:
def τ_lth(X, lα0, αI, αB, αn, αP, αR, αk, αε, αM):
    I, B, n, P, R, k, ε, M = X
    return lα0 + (I*αI) + (B*αB) + (n*αn) + (P*αP) + (R*αR) + (k*αk) + (ε*αε) + (M*αM)

In [3]:
r = pd.read_csv("data/R.csv") # logarithmic data 
r.head()

Unnamed: 0,TAUTH,IP,BT,NEL,PLTH,RGEO,KAREA,EPS,MEFF,TOK,SHOT,DATE,TIME,ind
0,-2.378171,0.001998,0.737164,2.294855,1.515786,0.500169,0.491191,-1.202761,0.693147,AUG,6483,19950518,4.0,1515
1,-2.254748,-0.002303,0.915891,1.904642,1.525839,0.499562,0.48789,-1.210263,0.693147,AUG,7555,19960223,2.498,1529
2,-2.546186,-0.22803,0.749055,1.835138,1.521481,0.498955,0.480396,-1.184928,0.693147,AUG,7634,19960301,3.499,1532
3,-2.486628,-0.226273,0.701115,1.852384,1.527143,0.500169,0.484045,-1.189324,0.693147,AUG,7639,19960301,3.5,1533
4,-2.511949,-0.228156,0.701611,1.853482,1.554348,0.500169,0.48285,-1.188727,0.693147,AUG,7640,19960301,3.501,1534


In [4]:
y = r.TAUTH.values
X = (
    r.IP.values, r.BT.values, r.NEL.values, r.PLTH.values, 
    r.RGEO.values, r.KAREA.values, r.EPS.values, r.MEFF.values
)

`popt`: Optimal values for the parameters so that the sum of the squared residuals of f(xdata, *popt) – ydata is minimized.

`pcov`: The estimated covariance of popt. The diagonals provide the variance of the parameter estimate. 

In [5]:
popt, pcov = curve_fit(τ_lth, X, y)

In [6]:
baseline = pd.read_csv("data/Baseline_regression_values.csv")
baseline

Unnamed: 0,α0,αI,αB,αP,αn,αM,αR,αe,αk
0,0.105675,0.770171,0.238516,-0.608801,0.383818,0.184438,1.99426,0.715733,0.432187


In [7]:
# Intercept
np.exp(popt[0])

0.03759822711978881

In [8]:
# Exponents
popt[1:]

array([ 1.55162488, -0.22349474,  0.07325017, -0.73538245,  0.89097562,
        0.2366993 , -1.30103608,  0.22162034])

### Using SKlearn

In [9]:
from sklearn.linear_model import LinearRegression

y = r[["TAUTH"]]
X = r[r.columns[1:9]]

regressor = LinearRegression()
regressor.fit(X,y)

v = pd.DataFrame(regressor.coef_, index=["Coefficient"]).transpose()
w = pd.DataFrame(X.columns, columns=["Attribute"])
coeff_df = pd.concat([w,v], axis=1, join="inner")
coeff_df

Unnamed: 0,Attribute,Coefficient
0,IP,1.551625
1,BT,-0.223495
2,NEL,0.07325
3,PLTH,-0.735382
4,RGEO,0.890976
5,KAREA,0.236699
6,EPS,-1.301036
7,MEFF,0.22162


In [10]:
np.exp(regressor.intercept_)

array([0.03759823])