# IBL Modeling - Experiment 1

The objective of this notebook is to fit the experiment data for 183 participants from Experiment 1 to a computational cognitive model and observe the results. The candidate model for this application is built on the Instance-based learning theory.

The first step in order to execute this, is to install the `speedyibl` library.

In [1]:
#!pip install speedyibl
from speedyibl import Agent
import random
import numpy as np
import matplotlib.pyplot as plt
from collections import deque
#import evaluate
import time # to calculate time
import pandas as pd

In [2]:
from openai import OpenAI
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
uri = "mongodb+srv://archanan:hGKhjjxhr8I891i9@archcluster0.i1cmz5h.mongodb.net/?retryWrites=true&w=majority&appName=ArchCluster0"
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [3]:
database = client["expt_claims_database"]
collection = database["expt1_claims_collection"]
print(database.list_collection_names())

['expt1_claims_collection']


In [4]:
def llm_similarity(img_name1, img_name2, db_collection):
    sim_dict = db_collection.find_one({ "img_name" : img_name1 }, {'length': 0, 'acc_status': 0, 'user_name': 0, 'Category': 0, 'text': 0, 'feedback': 0, 'text_embedding_optimised': 0})['scaled_similarity_dict']
    return sim_dict[img_name2]
llm_similarity('pretest_tweet_2.jpg', 'pretest_tweet_1.jpg', collection) 

0.37453271219041634

In [5]:
#Get all users from Experiment 1
users_info_df = pd.read_excel("CognitiveModelHelper_Exp1.xlsx", sheet_name="Cognitive_Model_Helper_Dataset")
users_info_df.head()

Unnamed: 0,Index,Participant_ID,Feedback_Frequency,stimulus,is_fact,is_misinfo,gets_feedback,Assessment,Action,Confidence,...,response_time,test_stage,text,Category,response_mins_adj,trust_scaled,conservativeness_scaled,acc_status,length_per,correct_assessment
0,2,Experiment1_Partcipant1,75,pretest_tweet_1.jpg,1,0,0,1,1,67,...,25898,pretest,Majority of lawmakers in 116th Congress are mi...,politics_,0.431633,1.0,0.428571,unverified,0.670455,1
1,3,Experiment1_Partcipant1,75,pretest_tweet_2.jpg,1,0,0,0,1,18,...,26730,pretest,Launch Directional Robot Intelligent Circuitry...,sports_,0.4455,1.0,0.428571,unverified,0.616477,0
2,4,Experiment1_Partcipant1,75,pretest_tweet_3.jpg,0,1,0,0,4,100,...,21060,pretest,"Once you’ve had the novel Coronavirus, you are...",covid-19_Spread,0.351,1.0,0.428571,unverified,0.698864,1
3,5,Experiment1_Partcipant1,75,pretest_tweet_4.jpg,1,0,0,1,0,85,...,20802,pretest,Latest on spread.\nThere is growing evidence t...,covid-19_Spread,0.3467,1.0,0.428571,unverified,0.696023,1
4,6,Experiment1_Partcipant1,75,pretest_tweet_5.jpg,1,0,0,1,0,100,...,10249,pretest,GM has pledged to stop making gasoline-powered...,business_,0.170817,1.0,0.428571,unverified,1.036932,1


In [6]:
participant_list = users_info_df.Participant_ID.unique()
#Remove participant 46
participant_list_final = [p for p in participant_list]
del participant_list_final[45]

In [17]:
def run_model(agent_name):
    agent_df = users_info_df.loc[users_info_df['Participant_ID'] == agent_name]
    agent = Agent(default_utility=2.0)
    agent.similarity([0], lambda x, y: llm_similarity(x , y)) #llm_similarity
    agent.similarity([1], lambda x, y: 1 - abs(x - y)) #length_per
    agent.similarity([2], lambda x, y: int(x == y)) #category
    agent.similarity([3], lambda x, y: int(x == y))
    agent.similarity([4], lambda x, y: int(x == y))
    runs = 1000 # number of runs (participants)
    trials = 15 # number of trials (episodes)
    trial_assessments = agent_df.loc[agent_df['test_stage'] == 'posttest']['Assessment'].to_list()
    agent_trust = agent_df['trust_scaled'].to_list()[0]
    agent_conservativeness = agent_df['conservativeness_scaled'].to_list()[0]
    trial_labels = agent_df.loc[agent_df['test_stage'] == 'posttest']['is_fact'].to_list()
    average_p = [] # to store average of performance (proportion of maximum reward expectation choice)
    average_time = [] # to save time
    predictions = []
    for r in range(runs):
        pmax = []
        ttime = [0]
        agent.reset() #clear the memory for a new run
        agent.similarity([0], lambda x, y: llm_similarity(x , y)) #llm_similarity
        agent.similarity([1], lambda x, y: 1 - abs(x - y)) #length_per
        agent.similarity([2], lambda x, y: int(x == y)) #category
        agent.similarity([3], lambda x, y: int(x == y))
        agent.similarity([4], lambda x, y: int(x == y))
        #prepopulate with pretest actions
        pretest_assessments = agent_df.loc[agent_df['test_stage'] == 'pretest']['Assessment'].to_list()
        pretest_stimulus = agent_df.loc[agent_df['test_stage'] == 'pretest']['stimulus'].to_list()
        pretest_labels = agent_df.loc[agent_df['test_stage'] == 'pretest']['is_fact'].to_list()
        pretest_categories = agent_df.loc[agent_df['test_stage'] == 'pretest']['Category'].to_list()
        pretest_claims = agent_df.loc[agent_df['test_stage'] == 'pretest']['text'].to_list()
        pretest_rt = agent_df.loc[agent_df['test_stage'] == 'pretest']['response_time'].to_list()
        pretest_rt_adj = agent_df.loc[agent_df['test_stage'] == 'pretest']['response_mins_adj'].to_list()
        pretest_length_per = agent_df.loc[agent_df['test_stage'] == 'pretest']['length_per'].to_list()
        pretest_acc_status = agent_df.loc[agent_df['test_stage'] == 'pretest']['acc_status'].to_list()
        sim_time = 0
        for i in range(15):
            sim_time = sim_time + pretest_rt[i]
            agent.populate_at((pretest_stimulus[i], pretest_length_per[i], pretest_categories[i], pretest_acc_status[i], pretest_assessments[i]), 0, sim_time) #change to populate_at
            #populate with training choices
        training_assessments = agent_df.loc[agent_df['test_stage'] == 'training']['Assessment'].to_list()
        training_stimulus = agent_df.loc[agent_df['test_stage'] == 'training']['stimulus'].to_list()
        training_labels = agent_df.loc[agent_df['test_stage'] == 'training']['is_fact'].to_list()
        training_categories = agent_df.loc[agent_df['test_stage'] == 'training']['Category'].to_list()
        training_claims = agent_df.loc[agent_df['test_stage'] == 'training']['text'].to_list()
        training_rt = agent_df.loc[agent_df['test_stage'] == 'training']['response_time'].to_list()
        training_rt_adj = agent_df.loc[agent_df['test_stage'] == 'training']['response_mins_adj'].to_list()
        training_length_per = agent_df.loc[agent_df['test_stage'] == 'training']['length_per'].to_list()
        training_acc_status = agent_df.loc[agent_df['test_stage'] == 'training']['acc_status'].to_list()
        gets_feedback = agent_df.loc[agent_df['test_stage'] == 'training']['gets_feedback'].to_list()
        for i in range(22):
            sim_time = sim_time + training_rt[i]
            if gets_feedback == 1:
                agent.populate_at((training_stimulus[i], training_length_per[i], training_categories[i], training_acc_status[i], training_assessments[i]), 40*int(training_labels[i] == training_assessments[i]), sim_time)
            else:
                #possibility of incorporating belief-based rewards in a future iteration
                agent.populate_at((training_stimulus[i], training_length_per[i], training_categories[i], training_acc_status[i], training_assessments[i]), 0, sim_time)
        #evaluate posttest decisions
        posttest_assessments = agent_df.loc[agent_df['test_stage'] == 'posttest']['Assessment'].to_list()
        posttest_stimulus = agent_df.loc[agent_df['test_stage'] == 'posttest']['stimulus'].to_list()
        posttest_labels = agent_df.loc[agent_df['test_stage'] == 'posttest']['is_fact'].to_list()
        posttest_categories = agent_df.loc[agent_df['test_stage'] == 'posttest']['Category'].to_list()
        posttest_claims = agent_df.loc[agent_df['test_stage'] == 'posttest']['text'].to_list()
        posttest_rt = agent_df.loc[agent_df['test_stage'] == 'posstest']['response_time'].to_list()
        posttest_rt_adj = agent_df.loc[agent_df['test_stage'] == 'posttest']['response_mins_adj'].to_list()
        posttest_length_per = agent_df.loc[agent_df['test_stage'] == 'posttest']['length_per'].to_list()
        posttest_acc_status = agent_df.loc[agent_df['test_stage'] == 'posttest']['acc_status'].to_list()
        choices = []
        for i in range(trials):
            options = [(posttest_stimulus[i], posttest_length_per[i], posttest_categories[i], posttest_acc_status[i], 0), (posttest_stimulus[i], posttest_length_per[i], posttest_categories[i], posttest_acc_status[i], 1)]
            choice = agent.choose(options)
            agent.respond(0)
            choices.append(choice[4])
        predictions.append(choices)
    return(predictions)

In [18]:
pred_test = run_model('Experiment1_Partcipant141')

In [15]:
participant_avg_accuracy = []
participant_max_accuracy = []
participant_avg_prediction = []
for participant in participant_list_final[:5]:
    predictions = run_model(participant)
    predictions_mat = np.asarray(predictions)
    mean_predictions = np.mean(predictions_mat, axis = 0).tolist()
    participant_avg_prediction.append(mean_predictions)
    assessments = users_info_df.loc[(users_info_df['test_stage'] == 'posttest')&(users_info_df['Participant_ID'] == participant)]['Assessment'].to_list()
    accuracy = []
    for i in range(1000):
        prediction = predictions[i]
        accuracy.append(sum([1 - abs(prediction[i] - assessments[i])for i in range(len(assessments))])/len(assessments))
    participant_avg_accuracy.append(np.mean(accuracy))
    participant_max_accuracy.append(np.max(accuracy))

In [16]:
participant_avg_prediction[0]

[0.484,
 0.496,
 0.488,
 0.5,
 0.496,
 0.535,
 0.486,
 0.529,
 0.499,
 0.479,
 0.514,
 0.515,
 0.507,
 0.528,
 0.486]

In [19]:
np.mean(np.asarray(pred_test), axis=0)

array([0.493, 0.508, 0.477, 0.486, 0.492, 0.459, 0.479, 0.515, 0.509,
       0.47 , 0.475, 0.504, 0.5  , 0.469, 0.493])