In [None]:
!pip install openai

In [None]:
!sudo apt-get install unzip
!unzip /content/cyberbullying_data_clean.zip

In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
import pandas as pd
import os
from tqdm import tqdm
import random
import pickle

**CAREFUL WITH API KEY!**

In [None]:
import base64
import requests

# OpenAI API Key
api_key = "put your api key here"

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}


In [None]:
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')


def response_text_to_label(response_text):
  if len(response_text) > 175:
    return -1

  if 'a.' in response_text:
    return 1

  if 'b.' in response_text:
    return 0

  return -1 #uncertain

In [None]:
dataset_path = 'cyberbullying_data_clean'
cyberbullying_path = dataset_path + '/cyberbullying'
non_cyberbullying_path = dataset_path + '/non_cyberbullying'

cyberbullying_images = os.listdir(cyberbullying_path)
non_cyberbullying_images = os.listdir(non_cyberbullying_path)

prompt = 'Cyberbullying content is used to harass, threaten, embarrass, or target another person.  Does the following image contain cyberbullying content?\na. Yes\nb. No'



In [None]:
if input('Sure? Y/N ') == 'Y':

  cyberbullying_subset = random.sample(cyberbullying_images, 100)
  non_cyberbullying_subset = random.sample(non_cyberbullying_images, 100)

  with open('cyberbullying_subset.pkl', 'wb') as f:
    pickle.dump(cyberbullying_subset, f)

  with open('non_cyberbullying_subset.pkl', 'wb') as f:
    pickle.dump(non_cyberbullying_subset, f)

In [None]:
cyberbullying_images = pickle.load(open('cyberbullying_subset.pkl', 'rb'))
non_cyberbullying_images = pickle.load(open('non_cyberbullying_subset.pkl', 'rb'))

In [None]:
few_shot_cyberbullying_images = ['1.anti-rainbow-flag-750x_0.jpg', '7.s-s-unshaven-sad-ashamed-man-doing-loser-sign-hand-fingers-his-front-funny-depressed-face-expression-s-139158713.jpg', '11.500_F_210144311_9SCWDyIi9SWOStydvAdmF2x89bwzrgQr.jpg', '12.Pg-42-sharp1-sandinson.jpg', '19.noose-around-neck-drawing-19.jpg']
few_shot_non_cyberbullying_images = ['27770950@N02_14364532144.jpg', '24021901@N00_1040628046.jpg', '40683746@N06_3744043658.jpg', '42002724@N07_15783744428.jpg', '256616725152145519_21816927.jpg']

def make_payload():
  payload = {
        "model": "gpt-4-turbo",
        "messages": [
          {
            "role": "user",
            "content": [

            ]
          }
        ],
        "max_tokens": 300
      }

  for image in few_shot_cyberbullying_images:
    image_path = f'{cyberbullying_path}/{image}'
    base64_image = encode_image(image_path)

    payload['messages'][0]['content'].append(
        {
            'type': 'text',
            'text': prompt
        }
    )

    payload['messages'][0]['content'].append(
        {
            'type': 'image_url',
            "image_url": {
              "url": f"data:image/jpeg;base64,{base64_image}",
              'detail': 'low'
              }
        }
    )
    payload['messages'][0]['content'].append(
        {
            'type': 'text',
            'text': 'Answer: a.'
        }
    )

  for image in few_shot_non_cyberbullying_images:
    image_path = f'{non_cyberbullying_path}/{image}'
    base64_image = encode_image(image_path)

    payload['messages'][0]['content'].append(
        {
            'type': 'text',
            'text': prompt
        }
    )

    payload['messages'][0]['content'].append(
        {
            'type': 'image_url',
            "image_url": {
              "url": f"data:image/jpeg;base64,{base64_image}",
              'detail': 'low'
              }
        }
    )
    payload['messages'][0]['content'].append(
        {
            'type': 'text',
            'text': 'Answer: b.'
        }
    )

  payload['messages'][0]['content'].append(
        {
            'type': 'text',
            'text': prompt
        }
    )

  return payload

In [None]:
payload = make_payload()

In [None]:
d = {'path': [], 'true_label': [], 'prompt': [], 'response': [], 'label': []}
df1 = pd.DataFrame(data=d)

In [None]:
for image in tqdm(cyberbullying_images):
  payload = make_payload()
  try:
    image_path = f'{cyberbullying_path}/{image}'
    base64_image = encode_image(image_path)

    payload['messages'][0]['content'].append(
      {
          'type': 'image_url',
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}",
            'detail': 'low'
            }
      }
    )

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    response_text = response.json()['choices'][0]['message']['content']
    label = response_text_to_label(response_text)

    new_row = pd.DataFrame(data = {'path': [image_path], 'true_label': [1], 'prompt': [prompt], 'response': [response_text], 'label': [label]})

    df1 = pd.concat([df1, new_row], ignore_index=True)

    payload['messages'][0]['content'].pop(-1)
    #print(df1)

  except:
      print('Error: ')
      print(image)

In [None]:
df1.to_csv('/content/drive/MyDrive/few_shot_cyberbullying.csv')

In [None]:
d = {'path': [], 'true_label': [], 'prompt': [], 'response': [], 'label': []}
df = pd.DataFrame(data=d)

In [None]:
for image in tqdm(non_cyberbullying_images):
  payload = make_payload()
  try:
    image_path = f'{non_cyberbullying_path}/{image}'
    base64_image = encode_image(image_path)

    payload['messages'][0]['content'].append(
      {
          'type': 'image_url',
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}",
            'detail': 'low'
            }
      }
    )

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    response_text = response.json()['choices'][0]['message']['content']
    label = response_text_to_label(response_text)

    new_row = pd.DataFrame(data = {'path': [image_path], 'true_label': [0], 'prompt': [prompt], 'response': [response_text], 'label': [label]})

    df = pd.concat([df, new_row], ignore_index=True)

    payload['messages'][0]['content'].pop(-1)
    #print(df)

  except:
      print('Error: ')
      print(image)

In [None]:
df.to_csv('/content/drive/MyDrive/few_shot_non_cyberbullying.csv')

In [None]:
df1 = pd.read_csv('/content/drive/MyDrive/few_shot_cyberbullying.csv')

In [None]:
cyberbullying_results = [result for result in list(df1.loc[:,"label"]) if result != -1]
non_cyberbullying_results = [result for result in list(df.loc[:,"label"]) if result != -1]

In [None]:
def get_stats(results, ground_truth):
  tp = tn = fp = fn = 0
  for result in results:
    if result:
      if ground_truth:
        tp += 1
      else:
        fp += 1

    else:
      if ground_truth:
        fn += 1

      else:
        tn += 1

  return len(results), tp, tn, fp, fn

In [None]:
a = get_stats(cyberbullying_results, 1)

In [None]:
b = get_stats(non_cyberbullying_results, 0)

In [None]:
c = []
for i in range(5):
  c.append(a[i] + b[i])

In [None]:
a

In [None]:
b

In [None]:
tp = c[1]
tn = c[2]
fp = c[3]
fn = c[4]


precision = tp / (tp + fp)
recall = tp / (tp + fn)
accuracy = (tp + tn) / (c[0])

In [None]:
print(precision, recall, accuracy) #0.6571428571428571 0.24468085106382978 0.5489130434782609