In [1]:
import numpy as np
import sklearn
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
import pandas as pd

In [2]:
var_names = ["demand", "trans", "storage", "inertia", "solar"]
regression_out = pd.DataFrame(index=pd.RangeIndex(stop=((len(var_names)**2-len(var_names)))//2), columns=["x", "y", "bx", "by", "bxy", "a", "R^2"])
regression_out

Unnamed: 0,x,y,bx,by,bxy,a,R^2
0,,,,,,,
1,,,,,,,
2,,,,,,,
3,,,,,,,
4,,,,,,,
5,,,,,,,
6,,,,,,,
7,,,,,,,
8,,,,,,,
9,,,,,,,


In [3]:
# Create model 
model = Pipeline([("poly", PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)),
                  ("regression", LinearRegression())])

In [5]:
row = 0

for i in range(len(var_names)):
    for j in range(i+1, len(var_names)):
        # Get data from Data folder
        raw_data = np.loadtxt("../Data/" + var_names[i] + "-" + var_names[j] + ".csv",delimiter=",")
        regression_out.loc[row,["x","y"]] = [var_names[i], var_names[j]] # Set first two columns of DataFrame to names of variables

        # Process and perform regression on data
        x_data = raw_data[:,:-1]
        y_data = raw_data[:,-1].T
        model = model.fit(x_data, y_data)

        # Store regression output in regression_out DataFrame
        regression_out.loc[row, ["bx", "by", "bxy"]] = model["regression"].coef_
        regression_out.loc[row, "a"] = model["regression"].intercept_
        regression_out.loc[row, "R^2"] = model.score(x_data, y_data)

        row += 1

regression_out

Unnamed: 0,x,y,bx,by,bxy,a,R^2
0,demand,trans,1.794423,-0.000162,0.000176,-1.585963,0.972838
1,demand,storage,1.746607,0.000129,-0.000134,-1.532123,0.933272
2,demand,inertia,1.321498,1.306123,-1.354885,-1.122578,0.749004
3,demand,solar,17.10434,15.306285,-15.864064,-16.35116,0.670159
4,trans,storage,1.5e-05,-3e-06,-0.0,0.209086,0.935218
5,trans,inertia,1.7e-05,-0.049131,-1.5e-05,0.193451,0.930992
6,trans,solar,0.000148,-0.631745,-0.000134,0.825271,0.8619
7,storage,inertia,-4e-06,-0.051049,4e-06,0.204258,0.72297
8,storage,solar,-5.7e-05,-0.627557,5.2e-05,0.829071,0.715082
9,inertia,solar,-0.523853,-0.474077,0.485887,0.663205,0.60285


In [6]:
regression_out.to_csv("regression-output.csv", index=False)