In [1]:
import os
import sys
import json
import time
import pandas as pd
from functools import partial

# update paths
sys.path.append("/home/rocassius/w266_final/scripts/assembly")
sys.path.append("/home/rocassius/w266_final/scripts/modeling")

from constant import DOC_GEN_PATH, MIN_SESSION, MAX_SESSION
from document import load_generic_documents
from subject import subject_keywords
from helper import pickle_object

from rmn import *
from rmn_analyzer import *

# constants
RMN_NAME = "PoliteFinish"
RMN_PATH = "/home/rocassius/gen-data/models"
SAVE_PATH = '/home/rocassius/gen-data/data/topic-data-first'
TOPIC_TAG = 'topic_data_%s.pkl'


#sessions = list(range(MIN_SESSION, MAX_SESSION+1))
sample_n = 50
sessions = list(range(93, MAX_SESSION+1))


def analyze_session(session, sample_n, doc_path, rmn):
    
    # read in session
    df = load_generic_documents(sessions=[session], read_path=doc_path)

    # analyze
    analyzer = RMN_Analyzer(rmn, df)
    print("Analyzing Session %s ..." % format(session, '03d'))
    analyzer.predict_topics()
    data = analyzer.analyze(sample_n)
    print("Data Gathered for Session %s. " % format(session, '03d'))

    # add session number
    data.update({SESS: session})

    # Save 
    pickle_object(data, os.path.join(SAVE_PATH, TOPIC_TAG % format(session, '03d')))

    return data



def main():
    
    # time it
    start = time.time()
    
    # make rmn
    rmn = RigidRMN(); rmn.load_rmn(name=RMN_NAME, save_path=RMN_PATH)
    
    # declare analyzing function
    analyze_func = partial(
        analyze_session, 
        sample_n=sample_n, 
        doc_path=DOC_GEN_PATH,
        rmn=rmn)
    
    # gather data
    data = [analyze_func(s) for s in sessions]
    
    # Save 
    pickle_object(data, os.path.join(SAVE_PATH, TOPIC_TAG % 'all'))
    
    end = time.time()
    elapsed = end - start

    # report
    print("SUCCESS, took", round(elapsed / 60, 2), "minutes")
    
    
if __name__ == "__main__":
    main()


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Analyzing Session 093 ...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df[PARTY][cond_index] = np.random.permutation(party)


Data Gathered for Session 093. 
Analyzing Session 094 ...
Data Gathered for Session 094. 
Analyzing Session 095 ...
Data Gathered for Session 095. 
Analyzing Session 096 ...
Data Gathered for Session 096. 
Analyzing Session 097 ...
Data Gathered for Session 097. 
Analyzing Session 098 ...
Data Gathered for Session 098. 
Analyzing Session 099 ...
Data Gathered for Session 099. 
Analyzing Session 100 ...
Data Gathered for Session 100. 
Analyzing Session 101 ...
Data Gathered for Session 101. 
Analyzing Session 102 ...
Data Gathered for Session 102. 
Analyzing Session 103 ...
Data Gathered for Session 103. 
Analyzing Session 104 ...
Data Gathered for Session 104. 
Analyzing Session 105 ...
Data Gathered for Session 105. 
Analyzing Session 106 ...
Data Gathered for Session 106. 
Analyzing Session 107 ...
Data Gathered for Session 107. 
Analyzing Session 108 ...
Data Gathered for Session 108. 
Analyzing Session 109 ...
Data Gathered for Session 109. 
Analyzing Session 110 ...
Data Gathered 

In [2]:
os.path.join(SAVE_PATH, TOPIC_TAG % 'all')

'/home/rocassius/gen-data/data/topic-data-first/topic_data_all.pkl'

In [3]:
from helper import load_pickled_object

In [4]:
data = load_pickled_object(os.path.join(SAVE_PATH, TOPIC_TAG % 'all'))

In [5]:
data

[{'dataset': {'n_records': 25460,
   'n_records_R': 12122,
   'n_records_D': 12787,
   'n_nan_preds_R': 0,
   'n_nan_preds_D': 0,
   'hh': 0.021083185,
   'hh_R': 0.020838577,
   'hh_D': 0.022116387,
   'js_RD': 0.04319629512161834,
   'js_placebo': {'mean': 0.014518108851773135,
    'lower': 0.009733122563830365,
    'upper': 0.01930309513971591},
   'topic_use': [(91, 0.05333000421524048),
    (2, 0.05289367213845253),
    (25, 0.037363775074481964),
    (7, 0.03361533209681511),
    (86, 0.02931549958884716),
    (23, 0.028548195958137512),
    (60, 0.027214156463742256),
    (96, 0.025739237666130066),
    (79, 0.024384891614317894),
    (26, 0.02380423992872238),
    (40, 0.02303234115242958),
    (38, 0.022691691294312477),
    (64, 0.021473709493875504),
    (81, 0.021253474056720734),
    (97, 0.02088293433189392),
    (84, 0.02061627432703972),
    (12, 0.020223557949066162),
    (95, 0.019444109871983528),
    (93, 0.01858927495777607),
    (77, 0.018099255859851837),
    (8,