# Import libraries

In [None]:
!pip install openai cohere tiktoken typing-extensions==4.5.0

from google.colab import userdata

import inspect
import os
import base64
import numpy as np
import pandas as pd

from openai import OpenAI

from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)

from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

# Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Authentication

In [None]:
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
client = OpenAI(
    api_key=OPENAI_API_KEY,
)

# Import dataset

In [None]:
total_defense_memes_test = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets_GPT/total_defense_memes/total_defense_memes_test.csv')
total_defense_memes_examples = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets_GPT/total_defense_memes/total_defense_memes_examples.csv')

# Define function to add definitions

In [None]:
def definition(x):
    if x == 'Military Defence':
        return """Singapore's Military Defence: Strong and formidable defence force made up of Regulars and National Servicemen, and supported by the entire Singapore."""
    elif x == 'Civil Defence':
        return """Singapore's Civil Defence: Collective effort of the Singaporean society to spot signs of threats, respond effectively and recover quickly from crisis."""
    elif x == 'Economic Defence':
        return """Singapore's Economic Defence: Strong and resilient Singaporean economy that is globally competitive and able to bounce back from any crisis."""
    elif x == 'Social Defence':
        return """Singapore's Social Defence: Bonds that unite Singaporeans, built on trust and understanding among people of different races and religions, living in harmony and looking out for one another."""
    elif x == 'Psychological Defence':
        return """Singapore's Psychological Defence: The will and resolve to defend the Singaporean way of life and interests, the fighting spirit to overcome challenges together."""
    elif x == 'Digital Defence':
        return """Singapore's Digital Defence: Being secure, alert and responsible online."""

In [None]:
total_defense_memes_test['pillar_w_definition'] = total_defense_memes_test['pillar'].apply(lambda x: definition(x))
total_defense_memes_examples['pillar_w_definition'] = total_defense_memes_examples['pillar'].apply(lambda x: definition(x))

# Define function to remap labels

In [None]:
def remap_labels(x):
    if x == 'Against':
        return 'bad'
    elif x == 'Neutral':
        return 'neutral'
    elif  x == 'Supportive':
        return 'good'

In [None]:
total_defense_memes_examples['stance'] = total_defense_memes_examples['stance'].apply(lambda x: remap_labels(x))

# Define function to clean responses

In [None]:
def remap(x):
    x = x.lower()
    if x == 'bad':
        return 'Against'
    elif x == 'neutral':
        return 'Neutral'
    elif  x == 'good':
        return 'Supportive'
    else:
        return None

# Set prompts

In [None]:
def prompt_one_shot_examples(pillar, pillar_w_definition):
    return inspect.cleandoc(f"""
    {pillar_w_definition}
    Indicate whether this meme is bad, neutral or good towards Singapore's {pillar}.""")

In [None]:
def prompt_question(pillar, pillar_w_definition):
    return inspect.cleandoc(f"""
    {pillar_w_definition}
    Indicate whether this meme is bad, neutral or good towards Singapore's {pillar}.
    Constraint: Without using any other words, answer either bad, neutral, good.""")

# Define a function to generate text using the `gpt-4-vision-preview` model

In [None]:
# Function to encode the image. via
# https://platform.openai.com/docs/guides/vision
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
# Prevent rate limit errors. via
# https://github.com/openai/openai-cookbook/blob/main/examples/
# How_to_handle_rate_limits.ipynb
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
# Call the OPENAI API. via
# https://platform.openai.com/docs/guides/vision
def prediction(image, pillar, pillar_w_definition, examples):
    if pillar == 'Economic Defence' or pillar == 'Social Defence':
        examples = total_defense_memes_examples.loc[total_defense_memes_examples['pillar'] == pillar]
        supportive = total_defense_memes_examples.loc[total_defense_memes_examples['stance'] == 'good']
        examples = pd.concat([examples, supportive.sample(n=1)], axis=0)
        examples = examples.groupby('stance').sample(n=1).reset_index(drop=True)
        examples = examples.set_index('stance').reindex(['bad', 'neutral', 'good']).reset_index()
    else:
        examples = total_defense_memes_examples.loc[total_defense_memes_examples['pillar'] == pillar]
        examples = examples.groupby('stance').sample(n=1).reset_index(drop=True)
        examples = examples.set_index('stance').reindex(['bad', 'neutral', 'good']).reset_index()
    completion = client.chat.completions.create(
    model="gpt-4-vision-preview",
    messages=[
        {
        "role": "user",
        "content": [
            {"type": "text", "text": prompt_one_shot_examples(examples['pillar'].iloc[0], examples['pillar_w_definition'].iloc[0])},
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{encode_image(examples['image'].iloc[0])}",
            },
            },
        ],
        },
        {
        "role": "assistant",
        "content": [
            {"type": "text", "text": examples['stance'].iloc[0]},
        ],
        },
        {
        "role": "user",
        "content": [
            {"type": "text", "text": prompt_one_shot_examples(examples['pillar'].iloc[1], examples['pillar_w_definition'].iloc[1])},
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{encode_image(examples['image'].iloc[1])}",
            },
            },
        ],
        },
        {
        "role": "assistant",
        "content": [
            {"type": "text", "text": examples['stance'].iloc[1]},
        ],
        },
        {
        "role": "user",
        "content": [
            {"type": "text", "text": prompt_one_shot_examples(examples['pillar'].iloc[2], examples['pillar_w_definition'].iloc[2])},
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{encode_image(examples['image'].iloc[2])}",
            },
            },
        ],
        },
        {
        "role": "assistant",
        "content": [
            {"type": "text", "text": examples['stance'].iloc[2]},
        ],
        },
        {
        "role": "user",
        "content": [
            {"type": "text", "text": prompt_question(pillar, pillar_w_definition)},
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{encode_image(image)}",
            },
            },
        ],
        }
    ],
    max_tokens=4096,
    temperature=0,
    seed=1,
    )

    return completion.choices[0].message.content, completion.usage.prompt_tokens, completion.usage.completion_tokens, examples.values.tolist()

# Call the `prediction` function and save inferences

In [None]:
%%time
df_temp = total_defense_memes_test.copy(deep=True)
# Create new columns by applying function. via
# https://stackoverflow.com/a/52363890
df_temp[['prediction', 'prompt_tokens_prediction', 'completion_tokens_prediction', 'examples']] = df_temp.apply(lambda row: prediction(row.image, row.pillar, row.pillar_w_definition, total_defense_memes_examples), axis='columns', result_type='expand')
df_temp.to_csv('/content/drive/MyDrive/stance_detection_datasets_GPT/total_defense_memes/total_defense_memes_test_one_shot_inference.csv', index=False)

CPU times: user 43.8 s, sys: 4.3 s, total: 48.1 s
Wall time: 1h 15min 33s


In [None]:
df_temp['prediction'] = df_temp['prediction'].apply(lambda x: remap(x))
print(df_temp['prediction'].value_counts())
print(df_temp['prediction'].isna().sum())
df_temp['prediction'] = df_temp['prediction'].apply(lambda x: x if x is not None else np.random.choice(['Against', 'Neutral', 'Supportive']))
print(f1_score(df_temp['stance'].values, df_temp['prediction'].values, labels=['Against', 'Neutral', 'Supportive'], average='macro'))
print(classification_report(df_temp['stance'].values, df_temp['prediction'].values, labels=['Against', 'Neutral', 'Supportive']))

Neutral       390
Against       286
Supportive     91
Name: prediction, dtype: int64
4
0.6795800454478623
              precision    recall  f1-score   support

     Against       0.79      0.65      0.71       352
     Neutral       0.64      0.76      0.70       332
  Supportive       0.61      0.66      0.63        87

    accuracy                           0.70       771
   macro avg       0.68      0.69      0.68       771
weighted avg       0.71      0.70      0.70       771

