In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

### Package Install

In [2]:
#!pip install causaltensor
import causaltensor

### Data Generation

In [3]:
from causaltensor.matlib import low_rank_M0_normal
from causaltensor.matlib import iid_treatment

import causaltensor.cauest.MCNNM as MC 

def generate(n1=100, n2=100, r=2, treat_prob=0.1, noise_scale=0.1, treat_scale=0.2, seed=42, covariates=False, missing_data=False):
    np.random.seed(seed)
    M0 = low_rank_M0_normal(n1=n1, n2=n2, r=r)
    Z = iid_treatment(prob=treat_prob, shape=M0.shape)
    a = np.random.normal(size=(n1, 1))
    b = np.random.normal(size=(n2, 1))
    abs_mean_M = np.mean(np.abs(M0))
    tau = abs_mean_M * treat_scale
    O = M0 + a + b.T + tau * Z + np.random.normal(scale=noise_scale*tau, size=M0.shape)
    if covariates:
        X = np.random.normal(size=(n1, n2, 5))
        beta = np.random.normal(size=(5, )) * abs_mean_M
        O += np.sum(X * beta, axis=2)
    if missing_data:
        Omega = np.random.rand(n1, n2) > 0.1
    if covariates and missing_data:
        return O, M0, Z, a, b, tau, beta, X, Omega
    elif covariates:
        return O, M0, Z, a, b, tau, beta, X
    elif missing_data:
        return O, M0, Z, a, b, tau, Omega
    else:
        return O, M0, Z, a, b, tau

### Estimating tau with observation matrix O and treatment pattern Z

`solve_with_regularizer`: solve tau by specifying the penalty for the nulear norm

In [4]:
from importlib import reload
reload(MC)

for T in range(10):
    O, M0, Z, a, b, tau = generate(seed=T)
    solver = MC.MCNNMPanelSolver(Z=Z)
    res = solver.solve_with_regularizer(O = O, l=2)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))

0.008327785171842397
0.003044323661370706
0.007947726226714842
0.002939786211748638
0.0011224985898514324
0.005343554003100552
8.143302736964846e-05
0.015225458418557561
0.005513430575071552
0.010271288978259826


`solve_with_suggested_rank`: solve tau by specifying the suggested rank 

In [5]:
for T in range(10):
    O, M0, Z, a, b, tau = generate(seed=T)
    solver = MC.MCNNMPanelSolver(Z)
    res = solver.solve_with_suggested_rank(O=O, suggest_r=2)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))

0.10883277996841537
0.02908401564937432
0.39736857143581905
0.03381671413699322
0.03755850745630404
0.3419893715575813
0.04213909134611238
0.4565049604963808
0.1930488063215501
0.2767454013352116


### Estimating tau with O, Z, and covariates X

In [6]:
from importlib import reload
reload(MC)

for T in range(10):
    O, M0, Z, a, b, tau, beta, X = generate(covariates=True, seed=T)
    solver = MC.MCNNMPanelSolver(Z, X=X)
    res = solver.solve_with_regularizer(O=O, l=2)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))
    print(np.linalg.norm(res.beta - beta) / np.linalg.norm(beta))

0.008429305047132312
0.0012889787652232245
0.0031585711561211323
0.0002720594882359827
0.007905375555744353
0.0003580027933435357
0.002957335631807289
0.0001281643392221225
0.0013761484678189782
0.00047186774614144675
0.005316098650709775
0.000292281113778584
0.000360445902342246
0.0009617708448097849
0.015543270157182931
0.0008608894425488633
0.005493215610871625
0.0003768622426017995
0.010123260561137092
0.0009214261490258841


### Estimating tau with O, Z, covariates X, and observing data pattern Omega
- Omega[i,j] is 0 is the data is missing; otherwise 1

In [7]:
from importlib import reload
reload(MC)

for T in range(10):
    O, M0, Z, a, b, tau, beta, X, Omega = generate(covariates=True, seed=T, missing_data=True)
    solver = MC.MCNNMPanelSolver(Z=Z, X=X, Omega=Omega)
    res = solver.solve_with_regularizer(O=O, l=2)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))
    print(np.linalg.norm(res.beta - beta) / np.linalg.norm(beta))

0.010454270610855972
0.0013445041763034185
0.0012413307875458082
0.00040040188768612295
0.010248134849797178
0.0003765501537210193
0.0023761130031284047
0.0001322713104957456
0.00036413911982830123
0.0003492938774685641
0.005120723965109225
0.00027396703513245805
0.0025576315148092385
0.0008698513346549436
0.01522374313899277
0.0010570510141343714
0.004751602929095883
0.00027605747065825517
0.01398660901178906
0.001070979093262242


`solve_with_cross_validation()`: find the best penalty coefficient $l$ with cross validation

In [8]:
reload(MC)

for T in range(10):
    O, M0, Z, a, b, tau, beta, X, Omega = generate(covariates=True, seed=T, missing_data=True)
    solver = MC.MCNNMPanelSolver(Z=Z, X=X, Omega=Omega)
    res = solver.solve_with_cross_validation(O=O)
    print(np.linalg.norm(res.tau - tau) / np.linalg.norm(tau))
    print(np.linalg.norm(res.beta - beta) / np.linalg.norm(beta))

  tau = np.sum((O - baseline)*self.Z) / np.sum(self.Z)


0.03570160514472806
0.009259224806406617
0.04133776790223322
0.0036528136126152437
0.10672500098841158
0.0030005383786561625
0.0009462885944692022
0.0013596483068011624
0.0008620897232990341
0.0030106657080206617
0.08049774032884975
0.002405057948759241
0.0006486756517671842
0.005506890610706509
0.11064431378419916
0.009442332196700993
0.051966120324297
0.002134813728555734
0.10165873469251138
0.007865736303514075
