# ABSA on Titans Threads

In [None]:
## Load libraries
from langchain.llms.openai import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.chains import SequentialChain
import openai
from getpass import getpass
import os
import warnings
import pandas as pd
import re

warnings.filterwarnings("ignore")

In [None]:
## Load data
titans_posts = pd.read_csv('../Reddit Data/tt_posts.csv')
titans_comments = pd.read_csv('../Reddit Data/tt_comments.csv')

In [None]:
## Load helper functions
class Post:
    def __init__(self, post_id, title, content):
        self.post_id = post_id
        self.title = title
        self.content = content
        self.comments = []

class Comment:
    def __init__(self, comment_id, text, post_id):
        self.comment_id = comment_id
        self.text = text
        self.post_id = post_id  # Store the post_id
        self.parent_comment = None
        self.replies = []

# Create dictionaries to map post IDs to Post objects and comment IDs to Comment objects.
posts_dict = {}
comments_dict = {}

# Populate posts_dict and comments_dict from your dataframes.
for post_row in titans_posts.itertuples():
    post_id = post_row.id
    title = post_row.Title
    content = post_row.Content
    post = Post(post_id, title, content)
    posts_dict[post_id] = post


comments_df = titans_comments.rename(columns={
    'Comment ID': 'Comment_ID',
    'Parent Comment ID': 'Parent_Comment_ID',
    'Text': 'Text',
    'Author': 'Author',
    'Date': 'Date',
    'Post ID': 'Post_ID'
})

# Now the columns have underscores instead of spaces, making it easier to access them.

# You can use the updated column names directly in your code as follows:
for comment_row in comments_df.itertuples():
    comment_id = comment_row.Comment_ID
    text = comment_row.Text
    post_id = comment_row.Post_ID  # Store the post_id
    comment = Comment(comment_id, text, post_id)
    comments_dict[comment_id] = comment

    # Assign parent comment if it exists.
    parent_comment_id = comment_row.Parent_Comment_ID
    if not pd.isna(parent_comment_id):
        parent_comment = comments_dict.get(parent_comment_id)
        if parent_comment:
            comment.parent_comment = parent_comment
            parent_comment.replies.append(comment)

# Function to get the full thread for a given post and its comments
def get_thread_for_post(post, comments_dict):
    thread = f"Title: {post.title}\nContent: {post.content}\n\nComments:\n"
    
    for comment_id, comment in comments_dict.items():
        if comment.post_id == post.post_id:
            if comment.parent_comment is None:
                indicator = "T:"  # Top-level comment indicator
            else:
                indicator = "R:"  # Reply indicator
            # Add the comment to the thread
            thread += f"{indicator} Comment Text: {comment.text}\n"

    return thread

In [None]:
## Set up OpenAI Key
OPENAI_API_KEY = getpass()
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
openai.api_key = os.getenv("OPENAI_API_KEY")
llm = OpenAI(temperature = 0.7)

In [None]:
## Aspect Extraction Chain
examples = [{
    "thread": '''
    Title: First Titans game!
    Content: Went to my first game this weekend and had so much fun! The crowd made the game so exiting and the team played so well. Derrick Henry had such a great game!
    Comments: 
    T: Comment Text: Glad you had fun! Games are always fun, but definitely better when we win. We just gotta hope the team and coaches keep it up!
    T: Comment Text: It was my first game too. The stadium was so much cooler than I ever expected. The food was way too expensive though!
    R: Comment Text: I agree, prices for food are way too high! Only worth it when we are winning!
    ''',
    "aspects": "Relevant Aspects are Titans team performance, pricing, and stadium atmosphere."
},
{
    "thread": '''
    Title: New coach?
    Content: We have had a rough start to the season and it may be a time for change. Do we give Vrabel the rest of the year or hit the reset button now?
    Comments: 
    T: Comment Text: I agree it hasn't been up to our standards, but I think we have to have faith and trust Coach Vrabel!
    R: Comment Text: I disagree, he should be fired.
    T: Comment Text: Sometimes coaches can be used as a scapegoat. We need to give it a bit longer and let the players gel together in this system.
    T: Comment Text: Trust the process. Our coaches and players will bring it together and we can make a playoff push. It is our job as fans to pack Nissan and help motivate them to perform!
    R: Comment Text: While I agree with packing Nissan Stadium, shouldn't they be motivated already?
    R: Comment Text: The crowd has been quiet recently. Let's get loud and encourage our boys!
    ''',
    "aspects": "Relevant Aspects are Titans team performance, coaching, and stadium atmosphere."
}]

prompt_template = '''
Thread: {thread}
{aspects}
'''

example_prompt = PromptTemplate(input_variables = ["thread", "aspects"], template = prompt_template)

final_prompt = FewShotPromptTemplate(
    examples = examples,
    example_prompt = example_prompt,
    suffix = "Thread: {thread}\n",
    input_variables = ["thread"],
    prefix = '''
    I am extracting aspects from a Reddit Thread made by Tennessee Titans fans. The Tennesse Titans are a football team that play at their stadium: Nissan Stadium in Nashville, Tennessee. Their coach is Mike Vrabel and their key players are Derrick Henry, Ryan Tannehill, Will Levis, DeAndre Hopkins, Peter Skoronski, Jeffery Simmons, Treylon Burks, Kevin Byard,  Any other player names that are given, assume they are on an opposing team.
For this conversational thread, please return a list of the following aspects of fan experience: Titans team performance, stadium amenities, coaching, pricing, stadium atmosphere, and media coverage. A note: stadium amenities include food and the gift shop. If none of these aspects are present, determine what the topic of the conversation is and proceed with this as the aspect.
The structure of the post will be as follows. Title is the general title made of the original post. Content is the text from the original post. Comments will be all comments on the post. Any comment labelled as "T: Comment Text:" is a top-level comment, so use the original post and content as the context for this comment. Any comment labelled as "R: Comment Text:" is a reply comment, so use all the comments above it until you hit a top-level comment, as well as the original post and content as the context for this comment. If the Content of a post is "nan" that means the post was an image. For any of these posts, just consider the post title and comments, ignoring the "nan" content.
Additionally, when the newline operator '\n' is present, this means that it is the end of that respective post section. This will work as a seperator to help distinguish posts from comments and so on.
    ''')

aspect_extraction_chain = LLMChain(llm = llm, prompt = final_prompt, output_key = 'aspects')

In [None]:
## Sentiment Analysis Chain
prompt_template2 = '''
Given below thread and the extracted aspects, tell me about the sentiment of those aspects. This sentiment should be on a continuous scale of -1 to 1, where -1 is the most negative, 0 is the most neutral, and 1 is the most postive. Round the score to 2 decimal places. Follow this format: (aspect, sentiment_score).
Thread: {thread}
Aspects: {aspects}
[(Aspect1, Sentiment_Score_1), (Aspect2, Sentiment_Score_2),.....]
'''

example_prompt2 = PromptTemplate(input_variables = ["thread", "aspects"], template = prompt_template2)

aspect_sentiment_chain = LLMChain(llm = llm, prompt = example_prompt2, output_key = "Aspects_with_sentiment")

In [None]:
## Full Sequential Chain
overall_chain = SequentialChain(
    chains = [aspect_extraction_chain, aspect_sentiment_chain],
    input_variables = ["thread"],
    output_variables = ["thread", "aspects", "Aspects_with_sentiment"],
    verbose = False
)

In [None]:
## Store threads
threads = []
for post_id, post in posts_dict.items():
    thread = get_thread_for_post(post, comments_dict)
    threads.append(thread)

In [None]:
## Run on threads
## Note: won't store in output, need to learn how to store on to DF
output = []
for thread in threads:
    res = overall_chain({"thread": thread})
    temp = res["Aspects_with_sentiment"]
    matches = re.findall(r'\(([^,]+), ([^)]+)\)', temp)
    result_list = [(match[0], float(match[1])) for match in matches]
    output.append(result_list)