In [1]:
from __future__ import print_function
import psycopg2 as pg2
import numpy as np
import re
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sb
import pandas as pd
import gensim
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from scipy.spatial.distance import jaccard as jaccard_distance
from scipy.spatial.distance import cosine as cosine_distance
from scipy.spatial.distance import hamming as hamming_distance
from scipy import stats as ss
from sklearn.cross_decomposition import CCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as FLD
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.utils import resample
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC
from matplotlib import rc
rc('text', usetex=False)
import csv
import sys
np.set_printoptions(threshold=sys.maxsize)
import os.path
import itertools
import random
import json
from copy import copy
import plotly
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.display import clear_output
import textstat as ts

In [2]:
# create descriptive labels for each (phase, trial)
data_type = {'academicCaptionA':np.float_,
'academicCaptionB':np.float_,
'actA':np.float_,
'actB':np.float_,
'activityCategoriesA':np.object_,
'activityCategoriesB':np.object_,
'activitySimilarityAB':np.float_,
'activityTextA':str,
'activityTextB':str,
'age':str,
'analyticalCaptionA':np.float_,
'analyticalCaptionB':np.float_,
'attentionA':np.float_,
'attentionB':np.float_,
'awareA':np.float_,
'awareB':np.float_,
'catch1':np.float_,
'catch10':np.float_,
'catch11':np.float_,
'catch12':np.float_,
'catch13':str,
'catch14':np.float_,
'catch15':np.float_,
'catch16':np.float_,
'catch17':np.float_,
'catch2':np.float_,
'catch3':np.float_,
'catch4':np.float_,
'catch5':np.float_,
'catch6':np.float_,
'catch7':np.float_,
'catch8':np.float_,
'catch9':np.float_,
'causalAB':np.float_,
'centerA':np.float_,
'centerB':np.float_,
'certainA':np.float_,
'certainB':np.float_,
'cherishedCaptionA':np.float_,
'cherishedCaptionB':np.float_,
'childishCaptionA':np.float_,
'childishCaptionB':np.float_,
'closeA':np.float_,
'closeB':np.float_,
'consequenceA':np.float_,
'consequenceB':np.float_,
'consistentA':np.float_,
'consistentB':np.float_,
'controlA':np.float_,
'controlB':np.float_,
'copeA':np.float_,
'copeB':np.float_,
'currentEmotionA':np.float_,
'currentEmotionB':np.float_,
'despicableCaptionA':np.float_,
'despicableCaptionB':np.float_,
'effectiveCaptionA':np.float_,
'effectiveCaptionB':np.float_,
'emotionA':np.object_,
'emotionB':np.object_,
'emotionalSimilarityAB':np.float_,
'english':np.float_,
'episodic1':np.float_,
'episodic2':np.float_,
'episodic3':np.float_,
'ethnicity':str,
'expectedA':np.float_,
'expectedB':np.float_,
'familiarA':np.float_,
'familiarB':np.float_,
'familyA':np.float_,
'familyB':np.float_,
'fatiguingCaptionA':np.float_,
'fatiguingCaptionB':np.float_,
'feedback':str,
'frustratingCaptionA':np.float_,
'frustratingCaptionB':np.float_,
'future1':np.float_,
'future2':np.float_,
'gender':str,
'goofyCaptionA':np.float_,
'goofyCaptionB':np.float_,
'grotesqueCaptionA':np.float_,
'grotesqueCaptionB':np.float_,
'heartwarmingCaptionA':np.float_,
'heartwarmingCaptionB':np.float_,
'helpfulCaptionA':np.float_,
'helpfulCaptionB':np.float_,
'indoorA':np.float_,
'indoorB':np.float_,
'initiatedA':np.float_,
'initiatedB':np.float_,
'instructionalCaptionA':np.float_,
'instructionalCaptionB':np.float_,
'interactA':np.float_,
'interactB':np.float_,
'language':np.object_,
'locationA':np.float_,
'locationB':np.float_,
'maliciousCaptionA':np.float_,
'maliciousCaptionB':np.float_,
'memoryA':str,
'memoryB':str,
'mischievousCaptionA':np.float_,
'mischievousCaptionB':np.float_,
'moreRecentAB':np.float_,
'overallSimilarityAB':np.float_,
'peopleSimilarityAB':np.float_,             
'preciousCaptionA':np.float_,
'preciousCaptionB':np.float_,
'pressureA':np.float_,
'pressureB':np.float_,
'productiveCaptionA':np.float_,
'productiveCaptionB':np.float_,
'promptA2':str,
'promptB2':str,
'properA':np.float_,
'properB':np.float_,
'race':np.object_,
'recallFreqA':np.float_,
'recallFreqB':np.float_,
'regularCaptionA':np.float_,
'regularCaptionB':np.float_,
'relateAB':str,
'relationshipA':np.float_,
'relationshipB':np.float_,
'reoccurA':np.float_,
'reoccurB':np.float_,
'repulsiveCaptionA':np.float_,
'repulsiveCaptionB':np.float_,
'riskA':np.float_,
'riskB':np.float_,
'scholarlyCaptionA':np.float_,
'scholarlyCaptionB':np.float_,
'self-esteemA':np.float_,
'self-esteemB':np.float_,
'semantic1':np.float_,
'semantic2':np.float_,
'sentimentalCaptionA':np.float_,
'sentimentalCaptionB':np.float_,
'settingA':np.float_,
'settingB':np.float_,
'sharedPeopleAB':np.float_,
'similarEnvironmentA':np.float_,
'similarEnvironmentB':np.float_,
'spatial1':np.float_,
'spatial2':np.float_,
'spatial3':np.float_,
'spatialDistanceAB':np.object_,
'spatialScaleA':np.float_,
'spatialScaleB':np.float_,
'spatialSimilarityAB':np.float_,
'stakeA':np.float_,
'stakeB':np.float_,
'standardCaptionA':np.float_,
'standardCaptionB':np.float_,
'stressfulCaptionA':np.float_,
'stressfulCaptionB':np.float_,
'temporalDistanceAB':np.object_,
'temporalScaleA':np.float_,
'temporalScaleB':np.float_,
'temporalSeparationAB':np.float_,
'tiresomeCaptionA':np.float_,
'tiresomeCaptionB':np.float_,
'transportability1':np.float_,
'transportability2':np.float_,
'transportability3':np.float_,
'transportability4':np.float_,
'transportability5':np.float_,
'transportability6':np.float_,
'transportability7':np.float_,
'transportability8':np.float_,
'transportability9':np.float_,
'transportability10':np.float_,
'transportability11':np.float_,
'transportability12':np.float_,
'transportability13':np.float_,
'transportability14':np.float_,
'transportability15':np.float_,
'transportability16':np.float_,
'transportability17':np.float_,
'transportability18':np.float_,
'transportability19':np.float_,
'turkExperience':np.float_,             
'typicalCaptionA':np.float_,
'typicalCaptionB':np.float_,
'understandingA':np.float_,
'understandingB':np.float_,
'usefulCaptionA':np.float_,
'usefulCaptionB':np.float_,
'usualCaptionA':np.float_,
'usualCaptionB':np.float_,
'vividnessA':np.float_,
'vividnessB':np.float_,
'wackyCaptionA':np.float_,
'wackyCaptionB':np.float_,
'wordCloudA':np.object_,
'wordCloudB':np.object_,
'workerID':str}


tempSepDict = {0:'< 24 hrs',
               1:'< 7 days',
               2:'< 30 days',
               3:'< 12 months',
               4:'< 5 years',
               5:'5+ years'
              }

In [3]:
table_name = 'main'
select_columns = 'workerid, hitid, datastring'
# select_columns = 'workerid, assignmentid, hitid, status, datastring'
pattern_filter = "status IN (4) AND hitid IN ('37ZQELHEQ0AS4P5GYVIPGPGK95INMK', '3MVY4USGB6Z3OJ7ULC1JNQJ9FWSSI9', '38F60IALAGTTWO3T6MH3RRFZVNF0T5')" # grab all entries that successfully completed the HIT

In [4]:
# run command `heroku pg:credentials:url` within app directory to get the credentials needed below
conn = pg2.connect(user='wuorsurrbxjlgo',
                   password='11b722ee64c73480b7bc2ae64cec8a218dc44f708e16114bc6219e282d505d1a',
                   host='ec2-52-1-20-236.compute-1.amazonaws.com',
                   port='5432',
                   database='ddjst7vhtqfvmf'
                  )
cur = conn.cursor()
cur.execute('SELECT {coi} FROM {tn} WHERE {pf}'.format(coi=select_columns, tn=table_name, pf=pattern_filter))
all_rows = cur.fetchall()
conn.close()

In [5]:
# Initialize empty data frame
num_entries = len(all_rows)
index = np.arange(num_entries)
columns = list(data_type.keys())
df = pd.DataFrame(index=index, columns=columns)
df = df.astype(data_type)

In [6]:
# get all question names and initialize empty pandas dataframe
for rwid, rw in enumerate(all_rows):
    jsdict = json.loads(rw[2])
    df.at[rwid, 'workerID'] = jsdict['workerId']
    df.at[rwid, 'hitID'] = jsdict['hitId']
    df.at[rwid, 'assignmentID'] = jsdict['assignmentId']
    df.at[rwid, 'condition'] = jsdict['condition']
    for i, dataEntry in enumerate(jsdict['data']):
        if 'trialdata' in dataEntry.keys():
            if 'response' in dataEntry['trialdata']:
                question = dataEntry['trialdata']['question']
                response = dataEntry['trialdata']['response']
                if (question in df.columns):
                    df.at[rwid, question] = response
            elif 'memoryA' in dataEntry['trialdata']:
                df.at[rwid, 'memoryA'] = dataEntry['trialdata']['memoryA']
                df.at[rwid, 'memoryB'] = dataEntry['trialdata']['memoryB']

In [7]:
if np.isin('catch13' , df.columns):
    df['catch13'] = [response.lower() if response != 'nan' else 'nan' for response in df['catch13']]
# aggregate 'yesterday' times
df = df.set_index('workerID')

In [8]:
# catch trial summary

# create dictionary of correct answers
catch_answer_key = {'catch1':0,
                    'catch2':0,
                    'catch3':3,
                    'catch4':10,
                    'catch5':0,
                    'catch6':3,
                    'catch7':1,
                    'catch8':2,
                    'catch9':3,
                    'catch10':2,
                    'catch11':0,
                    'catch12':1,
                    'catch13':'in',
                    'catch14':2,
                    'catch15':3,
                    'catch16':4,
                    'catch17':0
                   }

# compute catch trial accuracy for each worker
worker_catch_accuracy = dict()
for worker in df.index:
    iscorrect = []
    for catch_trial in catch_answer_key.keys():
        if catch_trial in df.columns:
            catch_response = df.loc[worker].at[catch_trial]
            if catch_trial != 'catch13':
                if ~np.isnan(catch_response):
                    iscorrect.append(catch_response == catch_answer_key[catch_trial])
            else:
                if catch_response != 'nan':
                    iscorrect.append(catch_response == catch_answer_key[catch_trial])
    worker_catch_accuracy[worker] = np.mean(iscorrect)
    
df['catch_accuracy'] = [worker_catch_accuracy[worker] for worker in df.index]

In [9]:
for i in df.index:
    print(i)
    print(df.at[i, 'assignmentID'])
    print(df.at[i, 'catch_accuracy'])
    print('\n')
    print(df.at[i, 'memoryA'])
    print('\n')
    print(df.at[i, 'memoryB'])
    print('\n')
    print(df.at[i, 'wordCloudA'])
    print('\n')
    print(df.at[i, 'wordCloudB'])
    print('\n')
    print(df.at[i, 'activityCategoriesA'])
    print('\n')
    print(df.at[i, 'activityCategoriesB'])
    print('\n')

A3CX0LXFHZ6I3I
324G5B4FB4K9KA4GJLJ4AFI4115077
1.0


The most vivid part of the law and order SVU series is the latest episodes in which Elliot Stabler returns.  It is the episode where the initial part of the episode shows a car bombing with a women being emergency evacuated by ambulance.  Olivia Benson arrives on scene and recognizes that it is Elliot Stabler's wife.  She is quite distraught and shocked at the same time.


Once Elliot and Captain Benson had locked eye's, you could sense the pure emotion from them.  A sense of sadness, shock, frustration, and a longing of a long lost friend/partner.  It was truly a surreal moment in SVU history.  The episode continued with Stabler working on the team of SVU (partially) until he let his emotions get the best of him in the interrogation room of a suspect. 


[['Scary', 'suspensful', 'pain', 'suffering', 'Dangerous'], ['1', '2', '1', '1', '2']]


[['uncertain', 'frustrating', 'unsure', 'desperate', 'concering'], ['0', '1', '2', '2', '1']]