In [12]:
import pytest
import numpy as np
from statsmodels.datasets import grunfeld
import time
from datetime import datetime

from ipca import InstrumentedPCA


# Test Construction Errors
def test_construction_errors():
  with pytest.raises(ValueError):
    InstrumentedPCA(n_factors=0)
  with pytest.raises(NotImplementedError):
    InstrumentedPCA(intercept='jabberwocky')
  with pytest.raises(ValueError):
    InstrumentedPCA(iter_tol=2)


# Create test data and run package
data = grunfeld.load_pandas().data
data.year = data.year.astype(np.int64)
#data.firm = data.firm.apply(lambda x: x.decode('utf-8'))
# Establish unique IDs to conform with package
N = len(np.unique(data.firm))
ID = dict(zip(np.unique(data.firm).tolist(), np.arange(1, N+1)+5))
data.firm = data.firm.apply(lambda x: ID[x])
# Ensure that ordering of the data is correct
data = data[['firm', 'year', 'invest', 'value', 'capital']]
# prep PSF test vars test vars
PSF1 = np.random.randn(len(np.unique(data.loc[:, 'year'])), 1)
PSF1 = PSF1.reshape((1, -1))
PSF2 = np.random.randn(len(np.unique(data.loc[:, 'year'])), 2)
PSF2 = PSF2.reshape((2, -1))
data = data.set_index(['firm', 'year'])
data_y = data['invest']
data_x = data.drop('invest', axis=1)

t0 = datetime.now()

# Test InstrumentedPCA
regr = InstrumentedPCA(n_factors=2, intercept=False)
regr = regr.fit(X=data_x, y=data_y)
temp = regr.get_factors()
print("R2total", regr.score(X=data_x, y=data_y))
print("R2pred", regr.score(X=data_x, y=data_y, mean_factor=True))
print("R2total_x", regr.score(X=data_x, y=data_y, data_type="portfolio"))
print("R2pred_x", regr.score(X=data_x, y=data_y, mean_factor=True,
                             data_type="portfolio"))
print(datetime.now() - t0)




The panel dimensions are:
n_samples: 11 , L: 2 , T: 20
Step 1 - Aggregate Update: 991039.7089779959
Step 2 - Aggregate Update: 2.275957200481571e-15
-- Convergence Reached --
R2total 0.8652761174095233
R2pred 0.7659286476739096
The panel dimensions are:
n_samples: 11 , L: 2 , T: 20
R2total_x 1.0
The panel dimensions are:
n_samples: 11 , L: 2 , T: 20
R2pred_x 0.688718213406356
0:00:00.032888


In [22]:
len(data_x.loc[10])

20

In [21]:
temp[1][0].shape

(20,)

In [None]:
data.set_index(['firm', 'year'])