In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

### Package Install

In [None]:
!pip install causaltensor
import causaltensor

### Data Generation

In [10]:
from causaltensor.matlib import low_rank_M0_normal
from causaltensor.matlib import iid_treatment

import causaltensor.cauest.MCNNM as MC 

def generate(n1=100, n2=100, r=2, treat_prob=0.1, noise_scale=0.1, treat_scale=0.2, seed=42, covariates=False, missing_data=False):
    np.random.seed(seed)
    M0 = low_rank_M0_normal(n1=n1, n2=n2, r=r)
    Z = iid_treatment(prob=treat_prob, shape=M0.shape)
    a = np.random.normal(size=(n1, 1))
    b = np.random.normal(size=(n2, 1))
    abs_mean_M = np.mean(np.abs(M0))
    tau = abs_mean_M * treat_scale
    O = M0 + a + b.T + tau * Z + np.random.normal(scale=noise_scale*tau, size=M0.shape)
    if covariates:
        X = np.random.normal(size=(n1, n2, 5))
        beta = np.random.normal(size=(5, )) * abs_mean_M
        O += np.sum(X * beta, axis=2)
    if missing_data:
        Omega = np.random.rand(n1, n2) > 0.1
    if covariates and missing_data:
        return O, M0, Z, a, b, tau, beta, X, Omega
    elif covariates:
        return O, M0, Z, a, b, tau, beta, X
    elif missing_data:
        return O, M0, Z, a, b, tau, Omega
    else:
        return O, M0, Z, a, b, tau

### Estimating tau with O and Z

`solve_with_regularizer`: solve tau by specifying the penalty for the nulear norm

In [11]:
from importlib import reload
reload(MC)

for T in range(10):
    O, M0, Z, a, b, tau = generate(seed=T)
    solver = MC.MCNNMPanelSolver(O, Z)
    res = solver.solve_with_regularizer(l=2)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))

0.008327785171842397
0.003044323661370706
0.007947726226714842
0.002939786211748638
0.0011224985898514324
0.005343554003100552
8.143302736964846e-05
0.015225458418557561
0.005513430575071552
0.010271288978259826


`solve_with_suggested_rank`: solve tau by specifying the suggested rank 

In [12]:
for T in range(10):
    O, M0, Z, a, b, tau = generate(seed=T)
    solver = MC.MCNNMPanelSolver(O, Z)
    res = solver.solve_with_suggested_rank(suggest_r=2)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))

0.10883277996841537
0.02908401564937432
0.39736857143581905
0.03381671413699322
0.03755850745630404
0.3419893715575813
0.04213909134611238
0.4565049604963808
0.1930488063215501
0.2767454013352116


### Estimating tau with O, Z, and covariates X

In [50]:
from importlib import reload
reload(MC)

for T in range(10):
    O, M0, Z, a, b, tau, beta, X = generate(covariates=True, seed=T)
    solver = MC.MCNNMPanelSolver(O, Z, X=X)
    res = solver.solve_with_regularizer(l=2)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))
    print(np.linalg.norm(res.beta - beta) / np.linalg.norm(beta))

0.011225944895083358
0.001126914741559473
0.002880225514436671
0.000292173946190375
0.017976003987577927
0.00032472318161826173
0.0026517085273753314
0.00013970377313305375
0.0021113411934980983
0.0004295332598712637
0.01520082772404798
0.0002604210664437872
0.0010598020474344926
0.0008377372512665197
0.028510099644404673
0.0007885054125328276
0.010877781130478952
0.00028657698580058735
0.01768813112197616
0.0008211927728207738


### Estimating tau with O, Z, covariates X, and observing data pattern Omega

In [60]:
from importlib import reload
reload(MC)

for T in range(10):
    O, M0, Z, a, b, tau, beta, X, Omega = generate(covariates=True, seed=T, missing_data=True)
    solver = MC.MCNNMPanelSolver(O, Z, X=X, Omega=Omega)
    res = solver.solve_with_regularizer(l=2)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))
    print(np.linalg.norm(res.beta - beta) / np.linalg.norm(beta))

0.01381943141084278
0.0011944667009199843
0.006476455355013564
0.00041889932058516717
0.022902607170113744
0.0003498403606104332
0.0017119614343101702
0.00014535748809506386
0.0012080643239488177
0.0003431078107535029
0.015255150461104013
0.0002628078359347144
0.001734066738938527
0.0007138069012078162
0.028109279225065308
0.0010752776553787156
0.010685453010939247
0.00024142346344556935
0.025895067928505985
0.0009523520900522919


`solve_with_cross_validation()`: find the best l with cross validation

In [8]:
reload(MC)

for T in range(10):
    O, M0, Z, a, b, tau, beta, X, Omega = generate(covariates=True, seed=T, missing_data=True)
    solver = MC.MCNNMPanelSolver(O, Z, X=X, Omega=Omega)
    res = solver.solve_with_cross_validation()
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))
    print(np.linalg.norm(res.beta - beta) / np.linalg.norm(beta))

  res.tau = np.sum((self.O - res.baseline_model)*self.Z) / np.sum(self.Z)


0.23023731395560307
0.03268633064652568
0.35001664919373787
0.013099989239193576
0.7927320895229254
0.011053292882887674
0.04963385932086533
0.004967285705441626
0.05267566376197711
0.011357938281373646
0.5990871653502178
0.009532835938152085
0.018063605297096323
0.020336647160082336
0.7651364288635317
0.03444836593794891
0.4095547537915799
0.008491248590100588
0.7029503044540891
0.0294794634088909
