In [1]:
import Graph_feat as graph
from sklearn.linear_model import LinearRegression
from Gen_data import SimulationStudy
import numpy as np
import pandas as pd

In [2]:
np.random.seed(220924)
sim1: SimulationStudy = SimulationStudy(p=20, mean_correlation=0.5, cor_variance=0.2, n=1500, no_feat_cate=3, seed=220924)
simulation_linear = sim1.create_dataset()
cate_linear = simulation_linear[['CATE']]

sim2: SimulationStudy = SimulationStudy(p=20, mean_correlation=0.5, cor_variance=0.2, n=1500, no_feat_cate=3, non_linear='quadratic', seed=220924)
simulation_quad = sim2.create_dataset()
cate_quadratic = simulation_quad[['CATE']]

In [3]:
cov_lin, mean = sim1.get_covariance_matrix()
np.mean(cov_lin)

0.544633788394955

In [4]:
columns = [col for col in simulation_linear.columns if col.startswith('X')]
for col in columns:
    print(f'Mean of {col} is:', np.mean(simulation_linear[col].to_numpy()))
    print(f'Variance of {col} is:', np.var(simulation_linear[col].to_numpy()))

Mean of X0 is: 2.960594732333751e-17
Variance of X0 is: 1.0000000000000009
Mean of X1 is: 6.631732200427602e-17
Variance of X1 is: 0.9999999999999996
Mean of X2 is: 5.033011044967376e-17
Variance of X2 is: 1.0000000000000007
Mean of X3 is: 2.6053233644537006e-17
Variance of X3 is: 1.0
Mean of X4 is: 2.3684757858670008e-17
Variance of X4 is: 1.0000000000000002
Mean of X5 is: 2.3684757858670008e-17
Variance of X5 is: 0.9999999999999994
Mean of X6 is: 3.671137468093851e-17
Variance of X6 is: 0.9999999999999997
Mean of X7 is: 4.500103993147301e-17
Variance of X7 is: 0.9999999999999988
Mean of X8 is: 1.9539925233402754e-17
Variance of X8 is: 1.0000000000000018
Mean of X9 is: 3.907985046680551e-17
Variance of X9 is: 1.0000000000000007
Mean of X10 is: 1.0658141036401503e-17
Variance of X10 is: 1.000000000000001
Mean of X11 is: 2.960594732333751e-17
Variance of X11 is: 1.0
Mean of X12 is: 2.6349293117770383e-17
Variance of X12 is: 1.0000000000000002
Mean of X13 is: 3.0790185216271007e-17
Varia

Linear Setting

In [5]:
#Check for pairwise correlations
simulation_linear_rel = simulation_linear[['CATE', 'X0', 'X1', 'X2']]
correlation_pairwise_lin = simulation_linear_rel.corr()
correlation_pairwise_lin

Unnamed: 0,CATE,X0,X1,X2
CATE,1.0,0.518266,0.937289,0.933951
X0,0.518266,1.0,0.266787,0.446802
X1,0.937289,0.266787,1.0,0.802635
X2,0.933951,0.446802,0.802635,1.0


In [6]:
#Calculate the weights
lr = LinearRegression().fit(simulation_linear[['X0', 'X1', 'X2']].to_numpy(), cate_linear.to_numpy())
feat_weights_linear = lr.coef_
feat_weights_linear

array([[1., 3., 2.]])

In [7]:
## Plot relationship between all features and CATE
graph.plot_cate_feat_val(p=20, n=1500, mean_correlation=0.5, no_feat_cate=3, function='linear')

## Plot relationship between CATE features and CATE
graph.plot_cate_feat_val(p=20, n=1500, mean_correlation=0.5, no_feat_cate=3, function='linear', total=False)

In [8]:
graph.distr_cate(p=20, mean_correlation=0.5, n=1500, function='linear')


Quadratic Setting

In [9]:
simulation_quad_rel = simulation_quad[['CATE', 'X0', 'X1', 'X2']]
correlation_pairwise_quad = simulation_quad_rel.corr()
correlation_pairwise_quad

Unnamed: 0,CATE,X0,X1,X2
CATE,1.0,-0.00597,-0.004075,-0.029099
X0,-0.00597,1.0,0.463251,0.499174
X1,-0.004075,0.463251,1.0,0.856851
X2,-0.029099,0.499174,0.856851,1.0


In [10]:
#Calculate the weights
lr = LinearRegression().fit(np.square(simulation_quad[['X0', 'X1', 'X2']].to_numpy()), cate_quadratic.to_numpy())
feat_weights_quad = lr.coef_
feat_weights_quad

array([[1., 3., 2.]])

In [11]:
## Plot relationship between all features and CATE
graph.plot_cate_feat_val(p=20, n=1500, mean_correlation=0.5, no_feat_cate=3, function='quadratic')

## Plot relationship between CATE features and CATE
graph.plot_cate_feat_val(p=20, n=1500, mean_correlation=0.5, no_feat_cate=3, function='quadratic', total=False)

In [12]:
graph.distr_cate(p=20, mean_correlation=0.5, n=1500, function='quadratic')