# Installs & Imports

In [None]:
import pandas as pd

from transformers import AutoTokenizer, pipeline
import torch

from pprint import pprint
from tqdm.auto import tqdm
from sklearn import metrics

import time

import json

In [None]:
from huggingface_hub import login
hf_token = ''
login(token= hf_token)


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Loading the dataset

In [None]:
from google.colab import files
uploaded = files.upload()

Saving AITA_minorities_subset.csv to AITA_minorities_subset.csv
Saving aita_subset_titles.csv to aita_subset_titles.csv


In [None]:
import pandas as pd
aita_subset = pd.read_csv('AITA_minorities_subset.csv')

train_data = aita_subset[101:200]
test_data = pd.read_csv('aita_subset_titles.csv')


# STEP 3 Post Generation

In [None]:
# this length is selected to accomodate the average length aita post
post_max_tokens = 768

In [None]:
post_gen_system_message = f"""
You are a highly intelligent language model designed to generate posts for the "Am I the Asshole?" (AITA) subreddit. Your task is to read the title of an AITA post and generate an engaging post that aligns with the provided judgement.

Instructions:

1. Read the Title:
Carefully read the title of the AITA post.

2. Acknowledge the Required Judgement:
Consider the judgement indicated (NTA - Not the Asshole, NAH - No Assholes Here, ESH - Everyone Sucks Here). Think about the key points you need to make to ensure the story aligns with the required judgement.

3. Generate a Story by Following those Steps:

Create an AITA post without using section titles. Include the following elements seamlessly in the narrative:

Brief Background: Provide context for the story. Introduce the main characters and their relationships, and set the scene for the events that will unfold.

The Incident: Describe the specific incident or series of events that led to the conflict. Be detailed and clear about what happened, who was involved, and what actions were taken.

Friends' or Family's Opinion: Describe the opinions of friends or family members about the situation. Include differing viewpoints to provide a balanced perspective on the conflict.

The Current Outcome: Detail the consequences of the incident. Explain how the people involved reacted, any changes in relationships, and any ongoing impact the conflict has had.

Conclusion:
Summarize the key points of the story and pose similar question to the readers: "Am I the Asshole for [OP ACTIONS]?"

4. Maintain Authenticity:
Ensure that the story feels realistic and relatable. Use natural language and tone as if a real person is sharing their experience.

5. Adhere to the Judgement:
Ensure that the generated story logically leads to the required judgment (e.g., if the judgment is NTA, the story should clearly indicate why the poster might be considered not the asshole).

"""

In [None]:
# restricting length to avoid using lengthy posts into the prompt hence to many tokens
# but also avoiding passing too short example to not prompt the model for short story generation
percentile_75 = int(train_data['word_count'].quantile(0.75))
percentile_25 = int(train_data['word_count'].quantile(0.25))
train_data= train_data[train_data['word_count'] < percentile_75]
train_data= train_data[train_data['word_count'] > percentile_25]
train_data = train_data.reset_index(drop=True)

In [None]:
train_data['few_shot_input'] = 'Judgement: ' + train_data['label'] + ', Title: ' + train_data['title']
train_data['few_shot_output'] = train_data['body']

In [None]:
def create_example(row):
  one_shot_data = row
  one_shot = []
  for os_index, os_row in one_shot_data.iterrows():
    one_shot.append({"role": "user", "content": os_row['few_shot_input']})
    one_shot.append({"role": "assistant", "content": os_row['few_shot_output']})

  return one_shot

In [None]:
test_data['gen_post_prompt'] = "Judgement: " + test_data['label'] + ", Title: " + test_data['generated_titles']

In [None]:
def format_post_gen_input(row):
  if row['label'] == 'YTA':
    one_shot = create_example(train_data[train_data['label'] == 'YTA'].sample(1))
  elif row['label'] == 'NAH':
    one_shot = create_example(train_data[train_data['label'] == 'NAH'].sample(1))
  elif row['label'] == 'ESH':
    one_shot = create_example(train_data[train_data['label'] == 'ESH'].sample(1))

  system_message = [{"role": "system", "content": post_gen_system_message}]
  user_message = [{"role": "user", "content": row['gen_post_prompt']}]
  return system_message + one_shot + user_message


test_data.loc[:, 'get_post_input'] = test_data.apply(format_post_gen_input, axis=1)
#pprint(test_data.loc[:1, 'get_post_input'].tolist(), sort_dicts=False)

## Clearing GPU memory

## Generating with GPT 4o mini

In [None]:
%pip install --upgrade openai



In [None]:
from openai import OpenAI
import os

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "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"))

In [None]:
def generate_posts(formated_input):
    response = client.chat.completions.create(
    model="gpt-4o-mini",
    temperature=0.9,
    max_tokens = post_max_tokens,
    messages=formated_input,
    )

    return response.choices[0].message.content

In [None]:
test = test_data['get_post_input'][0]

In [None]:
import openai
chat_completion = client.chat.completions.create(
  model="gpt-4o-mini",
  temperature=0.9,
  max_tokens = post_max_tokens,
  messages=test)

In [None]:
chat_completion.to_dict()

{'id': 'chatcmpl-9wWCZQSws5v6DhbE6W4UOUuL09vyz',
 'choices': [{'finish_reason': 'stop',
   'index': 0,
   'logprobs': None,
   'message': {'content': "I’ve been friends with Jake for over ten years. He’s always been a great friend, but he has a somewhat mischievous sense of humor and a penchant for collecting what he calls “memorable moments.” Not too long ago, he decided to hang up a large photo collage in his living room, and one of the pictures he included is of the two of us at a party years ago. In that photo, we’re both having a great time, but my wife is convinced I was being a bit too friendly with an ex of mine who’s also in the picture. \n\nHonestly, it was just a fun night out—everyone was having a blast, and it didn’t mean anything. But my wife has been having anxiety about it since we got married. She’s expressed her discomfort with that photo being on display, saying it feels like a reminder of my past relationships. At first, I brushed it off thinking it was just her bei

In [None]:

test_data['generated_posts_gpt_mini'] = test_data['get_post_input'].apply(generate_posts)


In [None]:
test_data.to_csv('aita_subset_gpt_mini_posts.csv')

In [None]:
print(chat_completion.choices[0].message.content)

I’ve been friends with Jake for over ten years. He’s always been a great friend, but he has a somewhat mischievous sense of humor and a penchant for collecting what he calls “memorable moments.” Not too long ago, he decided to hang up a large photo collage in his living room, and one of the pictures he included is of the two of us at a party years ago. In that photo, we’re both having a great time, but my wife is convinced I was being a bit too friendly with an ex of mine who’s also in the picture. 

Honestly, it was just a fun night out—everyone was having a blast, and it didn’t mean anything. But my wife has been having anxiety about it since we got married. She’s expressed her discomfort with that photo being on display, saying it feels like a reminder of my past relationships. At first, I brushed it off thinking it was just her being overly sensitive. But as time went on, I couldn’t help but notice how it affected her mood every time we visited Jake. 

During a recent trip to Jake’

In [None]:
test_data['generated_posts_gpt_mini'][0]

"I (30M) have a close friend, Jake (32M), who recently moved in with his girlfriend. They've been dating for a few years, and while we get along well, he has a tendency to display photos and memorabilia from past relationships. When they settled into their new place, he put up a large, sentimental photo of him with his ex-girlfriend at a festival. He’s mentioned how much that photo means to him, but it has become a point of tension for my wife (28F).\n\nThe issue came to a head when we visited Jake’s new place for the first time. As soon as we walked in, my wife’s face dropped when she saw the picture hanging prominently in the living room. She tried to play it cool, but I could see it was eating at her. Later, she confided in me that seeing the photo made her uncomfortable. She felt disrespected and worried that I might inadvertently compare her to Jake’s ex.\n\nI totally understand the sentiment behind the photo, but I also want my wife to feel comfortable in any situation. So, I bro

## Generating with GPT 4o

In [None]:
def generate_posts(formated_input):
    response = client.chat.completions.create(
    model="gpt-4o",
    temperature=0.9,
    max_tokens = post_max_tokens,
    messages=formated_input,
    )

    return response.choices[0].message.content

In [None]:
test = test_data['get_post_input'][0]

In [None]:
import openai
chat_completion = client.chat.completions.create(
  model="gpt-4o-mini",
  temperature=0.9,
  max_tokens = post_max_tokens,
  messages=test)

In [None]:
chat_completion

ChatCompletion(id='chatcmpl-9wWV24iXnuqBaFzJPgE6TIVTLGvm6', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="I’ve had a close friend, Jake, for over a decade. We’ve shared countless memories and have always been there for each other. Recently, he posted a photo on social media that was taken at a party a few years back. In it, I’m with my wife, and Jake’s arm is wrapped around her shoulder in a friendly way. The problem? My wife is uncomfortable with how she looks in the picture, and honestly, it feels a bit too intimate for our liking.\n\nWhen my wife first saw it, she fumbled with her words, trying to articulate why it bothered her. She mentioned that she didn’t like the way she looked and that it gave off a vibe that didn’t sit well with her. I understood her concerns – it’s important for couples to communicate boundaries and feelings, especially when it comes to representation in shared spaces like social media.\n\nSo, I brought i

In [None]:
import time
start_time = time.time()

test_data['generated_posts_gpt_4o'] = test_data['get_post_input'].apply(generate_posts)

print(f'Time: {int(time.time() - start_time)} seconds')


Time: 1400 seconds


In [None]:
test_data.to_csv('aita_subset_gpt_posts.csv')

In [None]:
from google.colab import files
files.download('aita_subset_gpt_posts.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>