In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

excel_loc = "results.xlsx"
frame_raw = pd.read_excel(excel_loc, sheet_name=0, index_col=0).drop(["Run"], axis=1)

In [7]:
from sklearn.metrics import mean_squared_error

def train(target_col, effect_cols):
    # read the frame
    # minmax scale
    scaler = MinMaxScaler()
    scaler.fit(frame_raw)
    frame_scaled = frame_raw[0:0].copy()
    frame_scaled[:] = scaler.transform(frame_raw)
    frame_scaled[effect_cols] = 0 # zero out consequences
    
    # split
    x, y = frame_scaled.loc[:, frame_raw.columns != target_col], frame_scaled.loc[:, frame_raw.columns == target_col] 
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    
    # train
    svr = SVR(kernel='linear')
    svr.fit(x_train, y_train)
    
    # r2
    y_pred = svr.predict(x_test)
    mse = mean_squared_error(y_test, y_pred)
    
    # return weights
    return svr.coef_, svr.intercept_, mse

In [32]:
cor = ["peak_r0", "length", "peak_infection", "peak_infection_loc", "peak_infection_pct", "peak_r0_loc", "cleans_per_day"]
for x in cor:
    if x == "cleans_per_day":
        continue # constant throughout runs so 0 correlation
    other = [y for y in cor if y != x]
    res, intercept, mse = train(x, other)
    res_abs = np.abs(res) / np.sum(np.abs(res))
    res_abs_df = pd.DataFrame([res_abs[res_abs != 0]], columns=[y for y in frame_raw.columns if y not in cor and x != y])
    print(f"=== {x} ===")
    print(res_abs_df)

=== peak_r0 ===
   num_infec_agents  num_uninfec_agents  num_rec_agents  hours_per_day  \
0          0.280666            0.225278        0.094345        0.08187   

   mask_efficacy  num_humans  
0       0.087162    0.230679  
=== length ===
   num_infec_agents  num_uninfec_agents  num_rec_agents  hours_per_day  \
0          0.018576            0.150425        0.046191       0.640335   

   mask_efficacy  num_humans  
0       0.001377    0.143095  
=== peak_infection ===
   num_infec_agents  num_uninfec_agents  num_rec_agents  hours_per_day  \
0          0.008331            0.480484        0.032215       0.000052   

   mask_efficacy  num_humans  
0       0.000285    0.478633  
=== peak_infection_loc ===
   num_infec_agents  num_uninfec_agents  num_rec_agents  hours_per_day  \
0          0.027737            0.353868        0.117409       0.133875   

   mask_efficacy  num_humans  
0       0.002775    0.364337  
=== peak_infection_pct ===
   num_infec_agents  num_uninfec_agents  num_rec

  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)


In [9]:
res

(array([[-0.0184147 ,  0.01625341,  0.00456528,  0.00602206,  0.        ,
          0.0037991 ,  0.        ,  0.01638902,  0.        ,  0.        ,
          0.00950753,  0.        ]]),
 array([0.22936991]),
 0.008777221032183473)