In [1]:
# Imports
import pandas as pd
import numpy as np
import random
from hugchat import hugchat
from hugchat.login import Login
import re
import time
import scipy.stats as stats


In [2]:
# Enter huggingface credentials
login = 'Your username'
password = 'Your password'
# Sign in
sign = Login(login, password)
cookies = sign.login()

cookie_path_dir = "./cookies_snapshot"
sign.saveCookiesToDir(cookie_path_dir)

chatbot = hugchat.ChatBot(cookies=cookies.get_dict()) 

In [3]:
# Load summaries
csv_file_path = "./data/youtube_shorts_with_chatbot_summary.csv"  
shorts = pd.read_csv(csv_file_path)

In [4]:
# Agent class definition
class Agent: 
    def __init__(self, age, gender, education, self_control_capacity, automatic_affective_reaction, automatic_approach_tendency, media_use_frequency):
        self.age = age
        self.gender = gender
        self.education = education 
        self.self_control_capacity = self_control_capacity
        self.automatic_affective_reaction = automatic_affective_reaction
        self.automatic_approach_tendency = automatic_approach_tendency
        self.media_use_frequency = media_use_frequency

    def toString(self):
        return f"age: {self.age}, gender: {self.gender}, education: {self.education}, self-control-capacity: {self.self_control_capacity}, automatic-approach-tendency: {self.automatic_approach_tendency}, media-use-frequency: {self.media_use_frequency}"

In [5]:
# Define attribute ranges
high_to_low_range = ["very high", "high",  "normal", "low", "very low"]
positive_to_negative_range = ["very positive", "positive", "normal", "low", "very low"]
age_range = list(range(10, 61))
gender_range = ["male", "female"]
education_range = high_to_low_range
self_control_capacity_range = high_to_low_range
automatic_approach_tendency_range = high_to_low_range
media_use_frequency_range = high_to_low_range
automatic_affective_reaction_range = positive_to_negative_range

# Create agent with randomized attribute values
def generate_random_agent():
    age = random.choice(age_range)
    gender = random.choice(gender_range)
    education =  random.choice(education_range)
    self_control_capacity = random.choice(self_control_capacity_range)
    automatic_approach_tendency =  random.choice(automatic_approach_tendency_range)
    media_use_frequency = random.choice(media_use_frequency_range)
    automatic_affective_reaction = random.choice(positive_to_negative_range)

    agent = Agent(age=age, gender=gender, education=education, self_control_capacity=self_control_capacity, automatic_affective_reaction=automatic_affective_reaction, automatic_approach_tendency=automatic_approach_tendency, media_use_frequency=media_use_frequency)
    return agent
# Generate random agent
def agents_factory(num_personas):
    agents = [generate_random_agent() for _ in range(num_personas)]
    return agents

In [21]:
# Generate agents
num_agents_to_generate = 100
agents = agents_factory(num_agents_to_generate)

# Show generated agents
for agent in agents:
    print(agent.toString())

age: 49, gender: female, education: very low, self-control-capacity: very high, automatic-approach-tendency: low, media-use-frequency: normal
age: 17, gender: female, education: high, self-control-capacity: very high, automatic-approach-tendency: very high, media-use-frequency: high
age: 57, gender: female, education: low, self-control-capacity: low, automatic-approach-tendency: very low, media-use-frequency: low
age: 12, gender: female, education: very low, self-control-capacity: low, automatic-approach-tendency: very low, media-use-frequency: normal
age: 40, gender: male, education: high, self-control-capacity: very low, automatic-approach-tendency: very high, media-use-frequency: very low
age: 31, gender: male, education: very high, self-control-capacity: high, automatic-approach-tendency: high, media-use-frequency: normal
age: 58, gender: male, education: normal, self-control-capacity: very high, automatic-approach-tendency: low, media-use-frequency: normal
age: 42, gender: male, e

In [22]:
# Extracting the score as the chatbot tends to add unnecessary text
def extract_score(text):
    numbers = re.findall('[01]', text)
    return numbers

# Create prompts
def process(agent, short):

    # Agent description and additional information about terminology
    agent_prompt = f'''You are a {agent.age} year old {agent.gender} with a {agent.education} degree of education. 
    Your automatic approach tendency is {agent.automatic_approach_tendency}, your automatic affective reaction towards social media use, is: {agent.automatic_affective_reaction} and your media use frequency is {agent.media_use_frequency}. 
    According to the Communication Science, automatic approach tendencies are defined as the subconscious of reflexive tendency to instinctively approaching something without deliberate thought. 
    For instance, a person with a high automatic approach tendency may tend to instinctively consume social media content without consciously making the decision of doing so.
    An example for automatic affective reactions towards social media use can be the brief joy after receiving a social media notification.'''

    # Short description and clarification of objective 
    short_prompt = f''' You are using Youtube and are given the option to watch the following YouTube Short: 
                Title: {short['Video Title']},
                Description: {short['LLM Summary']}
                Simultaneously, you have an important project tomorrow which needs extensive preparation.   
                Based on the information I gave you about your persona and the content of the video, you should decide whether you will watch this Short or presume working on your project. 
                Return a 1 for watching the Short and a 0 for not watching the Short. Only return this number.'''
    
    # Fuse both prompts together
    final_prompt = agent_prompt + short_prompt
    # send prompt to chatbot
    response = chatbot.query(final_prompt)
    
    # Extract relevant information from chatbot response and persona
    output = {
        "score": extract_score(str(response)),
        "age": agent.age,
        "gender": agent.gender,
        "education": agent.education, 
        "automatic_affective_reaction": agent.automatic_affective_reaction,
        "self_control_capacity": agent.self_control_capacity, 
        "automatic_approach_tendency": agent.automatic_approach_tendency,
        "media_use_frequency": agent.media_use_frequency,
    }
    
    return output

In [23]:
# Simulate behaviour towards short from all agents 
agent_outputs = []
for agent in agents:
    agent_output = process(agent, shorts.iloc[1])
    agent_outputs.append(agent_output)
    time.sleep(10)


In [24]:
# save agent_outputs
import pickle
with open('agent_outputs.pkl', 'wb') as file:
    pickle.dump(agent_outputs, file)

In [25]:
with open('agent_outputs.pkl', 'rb') as file:
    loaded_agents = pickle.load(file)
    

In [26]:
loaded_agents

[{'score': ['0'],
  'age': 49,
  'gender': 'female',
  'education': 'very low',
  'automatic_affective_reaction': 'very positive',
  'self_control_capacity': 'very high',
  'automatic_approach_tendency': 'low',
  'media_use_frequency': 'normal'},
 {'score': ['1', '1'],
  'age': 17,
  'gender': 'female',
  'education': 'high',
  'automatic_affective_reaction': 'positive',
  'self_control_capacity': 'very high',
  'automatic_approach_tendency': 'very high',
  'media_use_frequency': 'high'},
 {'score': ['0'],
  'age': 57,
  'gender': 'female',
  'education': 'low',
  'automatic_affective_reaction': 'low',
  'self_control_capacity': 'low',
  'automatic_approach_tendency': 'very low',
  'media_use_frequency': 'low'},
 {'score': ['0', '1'],
  'age': 12,
  'gender': 'female',
  'education': 'very low',
  'automatic_affective_reaction': 'positive',
  'self_control_capacity': 'low',
  'automatic_approach_tendency': 'very low',
  'media_use_frequency': 'normal'},
 {'score': ['0', '0'],
  'age': 

In [28]:
# Create dataframe for easier computation
df = pd.DataFrame(loaded_agents)
# Turn string data into numerical for calculation
map_dict = {'very low': -2, 'low': -1, 'normal': 0, 'high': 1, 'very high': 2, 'male': 0, 'female': 1, }
df[['age', 'gender', 'education', 'automatic_affective_reaction', 'self_control_capacity', 'automatic_approach_tendency', 'media_use_frequency']] = df[['age', 'gender', 'education', 'automatic_affective_reaction', 'self_control_capacity', 'automatic_approach_tendency', 'media_use_frequency']].replace(map_dict)
df['score'] = df['score'].apply(lambda x: str(x[0]))
grouped_df = df.groupby('score')['automatic_approach_tendency'].agg(list)
grouped_df

group_0 = grouped_df.get('0', [])
group_1 = grouped_df.get('1', [])
# Calculate mean of attributes for agents choosing to watch short and agents choosing to not watch short

# Group means together
# result = df.groupby('score').mean()

In [29]:
group_0

[-1,
 -2,
 -2,
 2,
 -1,
 -2,
 0,
 0,
 0,
 0,
 -2,
 -2,
 0,
 1,
 1,
 -1,
 0,
 1,
 0,
 -1,
 -2,
 0,
 0,
 -1,
 -1,
 2,
 -1,
 2,
 0,
 -2,
 -1,
 1,
 0,
 -2,
 -2,
 -2,
 -2,
 -1,
 2,
 -1,
 -2,
 2,
 1,
 1,
 -2,
 -2,
 0,
 0,
 0,
 0,
 1,
 -1,
 0,
 -1,
 -2,
 0,
 1,
 -2,
 0,
 2,
 1,
 -1,
 1,
 1,
 1,
 0,
 -2,
 -1,
 -2,
 1,
 1,
 -2,
 -2,
 0]

In [30]:
t_statistic, p_value = stats.ttest_ind(group_0, group_1)


In [31]:
print(f'T-statistic: {t_statistic}')
print(f'P-value: {p_value}')

T-statistic: -2.686962435380293
P-value: 0.008471607052198194


In [32]:
grouped_df

score
0    [-1, -2, -2, 2, -1, -2, 0, 0, 0, 0, -2, -2, 0,...
1    [2, 1, -2, 1, 0, 0, 1, 0, 0, 2, 1, 1, -1, -1, ...
Name: automatic_approach_tendency, dtype: object