# Dependencies

In [None]:
%%capture
!pip install openai
!pip install anthropic

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import openai
import anthropic
import time
import textwrap

openai.api_key = "api key to use te service"
# path to data
path = ''

# Preliminaries

In [None]:
def call_openai(system, query, model="gpt-3.5-turbo", max_tokens=100):
  start_time = time.time()
  completion = openai.ChatCompletion.create(
  model=model,
  messages=[
      {"role": "system", "content": system},
      {"role": "user", "content": query}
      ],
  temperature=0,
  max_tokens=max_tokens,
  )
  answer = completion.choices[0].message.content
  end_time = time.time()
  return answer, end_time-start_time

def call_anthropic(system, query, model="claude-v1.3", max_tokens=100):
  client = anthropic.Client(ANTHROPIC_API_KEY_TRIAL)
  start_time = time.time()
  for attempt in range(15):
    try:
      response = client.completion(
          prompt=f"{anthropic.HUMAN_PROMPT} {system}\n\n{query}{anthropic.AI_PROMPT}",
          stop_sequences = [anthropic.HUMAN_PROMPT],
          model=model,
          max_tokens_to_sample=max_tokens,
          temperature=0)
    except:
      print(f"Exception occurred! ({attempt+1})")
      time.sleep(0.35)
      continue
    else:
      break
  answer = response['completion']
  end_time = time.time()
  return answer, end_time-start_time

# Pipeline

Read and sample data

In [None]:
import random
random_seed = 1247

# read data and sample sentences for few-shot learning
# accepted
with open(path + 'greeklish_gazzetta_accepted.txt', "r", encoding="utf-8") as file:
  greeklish_accepted = []
  for line in file:
    if line[-1] == "\n" and line[:-1]:
        greeklish_accepted.append(line[:-1])

with open(path + 'greeklish_gazzetta_accepted.txt', "r", encoding="utf-8") as file:
  greek_accepted = []
  for line in file:
    if line[-1] == "\n" and line[:-1]:
        greek_accepted.append(line[:-1])
  # only use the translations from the first annotator
  greek_accepted = greek_accepted[:len(greeklish_accepted)]


# rejected
with open(path + 'greeklish_gazzetta_rejected.txt', "r", encoding="utf-8") as file:
  greeklish_rejected = []
  for line in file:
    if line[-1] == "\n" and line[:-1]:
        greeklish_rejected.append(line[:-1])

with open(path + 'greeklish_gazzetta_rejected.txt', "r", encoding="utf-8") as file:
  greek_rejected = []
  for line in file:
    if line[-1] == "\n" and line[:-1]:
        greek_rejected.append(line[:-1])
  # only use the translations from the first annotator
  greek_rejected = greek_rejected[:len(greeklish_rejected)]

random.seed(random_seed)
greeklish = random.sample(greeklish_accepted, 5)
random.seed(random_seed)
greeklish = random.sample(greeklish_rejected, 5)

random.seed(random_seed)
greek = random.sample(greek_accepted, 5)
random.seed(random_seed)
greek = random.sample(greek_rejected, 5)

## Create "system"

Zero-shot version

In [None]:
# Prepare the prompt, or in the chatgpt terminology the "system"
#instruction = "Translate the following sentences from Greeklish to Greek. Do not use different words."
instruction_zero = "You are an expert in online content moderation. The following sentences are online user comments in Greeklish, which can either be accepted or rejected for posting by the moderator. For each sentence provided, classify as either 0 for \"rejected\" or 1 for \"accepted\". Here is an example: \n\n"
system = f"{instruction_zero}\n\n"

for i in range (len(greeklish)):
  system = f"{system}Sentence:\n{greeklish[i]}\n"
  cls = "1" if i<5 else "2"
  system = f"{system}Class:\n{cls}\n\n"

# print the entire prompt given to GPT4
print(system)

Few-shot version (greeklish translations)

In [None]:
# Prepare the prompt, or in the chatgpt terminology the "system"

# Create prompt
instruction_few = "You are an expert in online content moderation. The following sentences are online user comments in Greeklish, which can either be accepted or rejected for posting by the moderator, depending on their toxicity. For each sentence provided, classify as either 0 for \"rejected\" or 1 for \"accepted\". Here is an example: \n\n"
system = f"{instruction_few}\n\n"

for i in range (len(greeklish)):
  system = f"{system}Sentence:\n{greeklish[i]}\n"
  cls = "1" if i<5 else "2"
  system = f"{system}Class:\n{cls}\n\n"

"For every sentence provided, only return the number of the class (0 or 1). To help you better understand Greeklish, here are some examples of Greeklish sentences and their translations in Greek:"
system = f"{instruction_few}\n\n"

system = f"{instruction_few}\n\n"
for i in range (len(greeklish)):
  system = f"{system}Greeklish:\n{greeklish[i]}\n"
  system = f"{system}Greek:\n{greek[i]}\n\n"

# print the entire prompt given to GPT4
print(system)

## Run queries

GPT-4

In [None]:
import time
import pandas as pd
# Multiple queries for running experiments with gpt-4
datasets = ["accepted","rejected"]
mode = "few"

for dataset in datasets:
  print("======Working on {} dataset======".format(dataset))

  # get correct dataset
  if dataset == "accepted":
    queries = []
  else:
    queries = []

  # call gpt4
  results_gpt4 = []
  for query in queries:
    print(textwrap.fill(query, initial_indent="Sentence:\t", subsequent_indent="\t"))
    res, t = call_openai(system, query, model="gpt-4")
    results_gpt4.append((query,res))
    print(textwrap.fill(res, initial_indent="gpt-4:\t", subsequent_indent="\t"))
    print(f"Generated answer in {t:.3f} seconds\n")

    # for flow control
    time.sleep(1)

  print("Saving results...")
  df = pd.DataFrame(results_gpt4, columns=["sentence", "class"])
  df['class'] = df['class'].astype(int)
  df.to_csv(path+ "gpt4_toxicity_{}_gazzetta_{}_.csv".format(mode, dataset), index=False)

  with open(path+ "gpt4_toxicity_{}_gazzetta_{}_.txt".format(mode, dataset), 'w', encoding="utf-8") as file:
        for line in results_gpt4:
            file.write(line)
            file.write("\n")
  time.sleep(2)


Sentence:	Roberto eisai super agori mou!!!
gpt-4:	Class: 1
Generated answer in 1.080 seconds

Sentence:	Otan o Diamantidis sou epairne Eurwpaiko k se pigaine teliko
	Moundobasket den ta eleges auta tote....
gpt-4:	Class: 1
Generated answer in 0.909 seconds

Sentence:	Alla eixes afisei ta opadika stin akri,panugirizes kai eixes
	parei fanela Ethnikis me to onoma Diamantidis pisw....
gpt-4:	Class: 1
Generated answer in 0.640 seconds

Sentence:	Ela pes tin alitheia den einai kako....
gpt-4:	Class: 1
Generated answer in 1.423 seconds

Sentence:	To xete kanei karamela oloi les k den epaixe pote Ethniki
	alla toso komplexikoi eiste....
gpt-4:	Class: 0
Generated answer in 0.574 seconds

Sentence:	O spanoulis ti ekane me tin Ethniki??
gpt-4:	Class: 1
Generated answer in 0.581 seconds

Sentence:	Xeiroteri thesi se Eurobasket den exoume parei pote....
gpt-4:	Class: 1
Generated answer in 1.029 seconds

Sentence:	Megalwse o 3D den to xere na paizei mexri ta 40 Ethniki gia
	na min ton krazete oi ga

In [None]:
results_gpt4

['Class: 1',
 'Class: 0',
 'Class: 1',
 'Class: 1',
 'Class: 0',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 0',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 0',
 'Class: 1',
 'Class: 0',
 'Class: 1',
 '0',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 0',
 'Class: 0',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 0',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 0',
 '0',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
 'Class: 1',
