In [1]:
import pickle
import dill
import numpy as np
import pandas as pd
import pymc3 as pm 
from math import ceil

from collections import defaultdict, Counter

import utils
from talkpages import WikiCorpusReader, WikiCorpus
from alignment import Alignment

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
TOPICS = [
          'religion',
          'science', 
          'politics', 
          'history', 
          'people',
          'philosophy', 
          'sports',
          'linguistics', 
          'psychiatry',
          'environment']

META_CATEGORIES = {'stylistic': [
                        'articles',
                        'negations',
                        'prepositions',
                        'numbers',
                        'pronouns'
                    ], 
                    'rhetoric': [
                        'tentative',   
                        'certainty',
                        'discrepancy',
                        'inclusive',
                        'exclusive'
                    ],
                    'discursive': [
                        'causation',
                        'insight',
                        'inhibition',
                        'communication',
                        'cognitive process',
                        'sensory process',
                        'motion'
                    ],
                    'stance': [
                        'optimism',
                        'anger',
                        'anxiety',
                        'sadness',
                    ]}


# Keep a list of category names for convenience.
CATEGORY_LIST = []
for cats in META_CATEGORIES.values():
    CATEGORY_LIST.extend(cats)

    
# Load the filtered lists of markers. 
with open('../../data/liwc/final.dict', 'rb') as f:
    MARKER_DICT = pickle.load(f)
    
    marker_list = []
    for markers in MARKER_DICT.values():
        marker_list.extend(markers)
    MARKER_LIST = list(set(marker_list))

In [4]:
for TOPIC in TOPICS:
    
    print('{}\n{}'.format(TOPIC, '*'*15))
    
    with open('./counts-category/{}.dill'.format(TOPIC), 'rb') as f:
        N_base_all, N_align_all, C_base_all, C_align_all, _, _, _, _ = dill.load(f)
            
    # Statistical modelling
    for c, category in enumerate(CATEGORY_LIST):
        
        print('{}, {}\n{}'.format(TOPIC, category, '*'*30))
        
        # Load category-not-word alignment counts (Doyle & Frank, 2016)
        
        
        N_base, N_align, C_base, C_align = [], [], [], []
        for dyad in N_base_all:
            if C_base_all[dyad][c] > N_base_all[dyad][c]:
                continue
            if C_align_all[dyad][c] > N_align_all[dyad][c]:
                continue
            N_base.append(N_base_all[dyad][c])
            C_base.append(C_base_all[dyad][c])
            N_align.append(N_align_all[dyad][c])
            C_align.append(C_align_all[dyad][c])
            
        # A simple logistic model.
        with pm.Model() as model:
            # Parameters
            beta0 = pm.Cauchy('baseline intercept', alpha=-2, beta=3)
            alpha0 = pm.Normal('alignment intercept', mu=0, sd=0.25)
            
            # Include a guessing coefficient for robust logistic regression
            # (cfr. J. Kruschke, 2014, 'Doing Bayesian data analysis', pp. 635-636)
            guess = pm.Beta('guessing coefficient', alpha=1, beta=9)  
            
            # Transformed parameters
            mu_base  = guess * (1/2) + (1-guess) * pm.math.invlogit(beta0)            
            mu_align = guess * (1/2) + (1-guess) * pm.math.invlogit(beta0 + alpha0)
            
            # Model
            base_count  = pm.Binomial('C_base' , p=mu_base , observed=C_base, n=N_base)
            align_count = pm.Binomial('C_align', p=mu_align, observed=C_align, n=N_align)
        

        # Inference
        with model:
            trace = pm.load_trace('./traces-swam-category/{}-{}'.format(TOPIC, category))
            
            pm.traceplot(trace)
            
            pm.plot_posterior(trace, color='LightSeaGreen')
            plt.show()
#             plt.plot(approx.hist)
#             plt.show()
            
            
#             df_trace = pm.trace_to_dataframe(trace)
#             pd.plotting.scatter_matrix(df_trace[-1000:], diagonal='kde')
#             

            

religion
***************
religion, articles
******************************


TypeError: 'NoneType' object is not iterable