# ABSA on Preds Threads

In [11]:
## Load libraries
from langchain.llms.openai import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.chains import SequentialChain
import openai
from getpass import getpass
import os
import warnings
import pandas as pd
import re
import random
import time

warnings.filterwarnings("ignore")

In [2]:
## Load data
preds_posts = pd.read_csv('../Reddit Data/preds_posts_clean.csv')
preds_comments = pd.read_csv('../Reddit Data/preds_comments_clean.csv')

In [3]:
## Load helper functions
class Post:
    def __init__(self, post_id, title, content):
        self.post_id = post_id
        self.title = title
        self.content = content
        self.comments = []

class Comment:
    def __init__(self, comment_id, text, post_id):
        self.comment_id = comment_id
        self.text = text
        self.post_id = post_id  # Store the post_id
        self.parent_comment = None
        self.replies = []

# Create dictionaries to map post IDs to Post objects and comment IDs to Comment objects.
posts_dict = {}
comments_dict = {}

# Populate posts_dict and comments_dict from your dataframes.
for post_row in preds_posts.itertuples():
    post_id = post_row.id
    title = post_row.Title
    content = post_row.Content
    post = Post(post_id, title, content)
    posts_dict[post_id] = post


comments_df = preds_comments.rename(columns={
    'Comment ID': 'Comment_ID',
    'Parent Comment ID': 'Parent_Comment_ID',
    'Text': 'Text',
    'Author': 'Author',
    'Date': 'Date',
    'Post ID': 'Post_ID'
})

# Now the columns have underscores instead of spaces, making it easier to access them.

# You can use the updated column names directly in your code as follows:
for comment_row in comments_df.itertuples():
    comment_id = comment_row.Comment_ID
    text = comment_row.Text
    post_id = comment_row.Post_ID  # Store the post_id
    comment = Comment(comment_id, text, post_id)
    comments_dict[comment_id] = comment

    # Assign parent comment if it exists.
    parent_comment_id = comment_row.Parent_Comment_ID
    if not pd.isna(parent_comment_id):
        parent_comment = comments_dict.get(parent_comment_id)
        if parent_comment:
            comment.parent_comment = parent_comment
            parent_comment.replies.append(comment)

# Function to get the full thread for a given post and its comments
def get_thread_for_post(post, comments_dict):
    thread = f"Title: {post.title}\nContent: {post.content}\n\nComments:\n"
    
    for comment_id, comment in comments_dict.items():
        if comment.post_id == post.post_id:
            if comment.parent_comment is None:
                indicator = "T:"  # Top-level comment indicator
            else:
                indicator = "R:"  # Reply indicator
            # Add the comment to the thread
            thread += f"{indicator} Comment Text: {comment.text}\n"

    return thread

In [4]:
## Set up OpenAI Key
OPENAI_API_KEY = getpass()
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
openai.api_key = os.getenv("OPENAI_API_KEY")
llm = OpenAI(model_name="gpt-3.5-turbo-1106")

Links for examples:

https://www.reddit.com/r/Patriots/comments/17oqay4/the_lack_of_fundamentals_on_this_team_is/
https://www.reddit.com/r/Predators/comments/1871ola/picking_a_jersey/  
https://www.reddit.com/r/49ers/comments/173va1a/with_all_those_weapons/
https://www.reddit.com/r/Predators/comments/186bjzz/am_i_looking_at_this_wrong/
https://www.reddit.com/r/Tennesseetitans/comments/eo0j92/am_i_the_only_one_that_thinks_espn_hates_talking/
https://www.reddit.com/r/Predators/comments/tl6l7m/flyers_fans_visiting_the_area_looking_for_the/
https://www.reddit.com/r/Tennesseetitans/comments/wzzefw/a_bizarre_fun_fact_about_kern/
https://www.reddit.com/r/Predators/comments/f3gxgg/make_bridgestone_loud_again/
https://www.reddit.com/r/Predators/comments/189dja2/free_seat_upgrades/

In [5]:
## Aspect Extraction Chain
examples = [{
    "thread": '''
    Title: the lack of fundamentals on this team is astounding
    Content: to me the most shocking thing is how bad this team is top to bottom when it comes to fundamentals. awful decision making, penalties, etc. we’re awful across the board. it’s one thing to not have much talent, but how could a team under Vrabel be so bad at the fundamentals? our older teams had great fundamentals. 
    
    what happened? this season had really put Brunette's actual coaching skills in a terrible light imo.
    Comments: 
    T: Comment Text: It starts with coaching and buy in. Brunette has lost most of the team IMO. They have no real leaders. Most guys will be gone in 2 years and they know it. A bunch of rouge players not giving a shit.

    They don't trust the Brunette way anymore.
    R: Comment Text: Moulin Rouge?
    T: Comment Text: There is nowhere else to look other than Brunette. The justification to keep him around is that he should be able to field a disciplined team that gives effort and makes smart plays even if there is a talent disparity.

    With how unprofessional and unprepared the team has been the last few years I’m not sure what coaching “advantage” he’s giving you anymore.
    R: Comment Text: Trotz has to get rid of Brunette. Period
    R: Comment Text: It's time. 2-7.

    Who can this team beat in the remaining schedule? The Sharks??
    ''',
    "aspects": "Relevant Aspects are 'team performance' and 'coaching and management'."
},
{
    "thread": '''
    Title: Picking a jersey
    Content: My brother just moved to Tennessee and as a housewarming gift I was going to get him a jersey. Been a while since I’ve watched preds games even semi-regularly so not super up to date. Wanted to see if you guys have a good suggestion of who to pick.
    
    I know the obvious ones Josi, Saros, Forsberg, but to help, he typically likes the oft-underappreciated tougher, scrappier guys. Like a Radko Gudas, Tom Wilson, you get the point. Preferably somebody you guys think is here for the long term (at least a few years).
    
    Thanks!
    Comments: 
    T: Can't go wrong with Rinne jersey
    R: Amen
    T: You could always go with a custom jersey as well. My mom got me one for Christmas last year with my favorite number and last name and I love it!
    R: We got custom ones with our favorite players names and numbers lol. Much cheaper xD
    T: I personally like Sherwood
    R: Mr Hustle! Dude is everywhere all the time
    ''',
    "aspects": "Relevant Aspects are 'miscellaneous'."
},
{
    "thread": '''
    Title: “With all those weapons…”
    Content: “… who couldn’t win?”
    
    I’m so tired of this take from sports media and my friends. This is what you do in any sport - build the best team you can and coach them well. That’s what Andrew Brunette and Barry Trotz have done. Last night should have ended that sweaty, cynical take. Dallas focused on the players who have been red hot in the previous 4 games, and so we asked Forsberg and Josi to step up, and they were ready.    
    Comments: 
    T: Comment Text: I don’t even care about the talk anymore. I love Juuse Saros and that’s enough for me. I love him.
    R: Comment Text: 100%. The story, the identity he brings to the team, the focus and stability he brings to the game. I’m all in.
    T: Comment Text: y’all need to stop getting tilted about this shit. and embarrassment of riches is a good problem to have
    T: Comment Text: Saros just can’t win this narrative. He basically has to play perfect just so people think he’s ‘competent’. What happens when he has a bad game or god forbid two. All the national media is going to be ‘I told you so.’
    
    I just dunno what else Saros can do.
    R: Comment Text: Doesn't matter as long as the people in the house know what's up.
    ''',
    "aspects": "Relevant Aspects are 'team performance', 'coaching and management', and 'media coverage'."
},
{
    "thread": '''
    Title: Am I looking at this wrong?
    Content: Was always a casual fan, never a die hard. I loved going to Preds games my whole life. Always had a jersey, always scored tickets to several games a year. But something happened after 2017. I felt pushed out. All of a sudden, it was the hot ticket in town. Tickets dried up. I checked several times over the years, and tickets on the resell market were well into the hundreds for a Saturday game.
    
    I checked out and found other things to do. Didn’t really think about it until a year ago when this guy I was next to at a concert told me he scored lower bowl Hurricanes Game 7 tickets for 200 each. Now, I know Nashville is a cool place, but it felt like the Preds priced out all the fans who were around since the 90s. Just completely disconnected from us who had been on board since day 1.
    
    Tickets are starting to appear again! But I’m not getting back on board for them to push me out again. Once again, I’m not a die hard. I’m sure they were out there if I looked at the right places at the right times. But it felt like you had to be locked in where as before, the Preds tried to reach you
    
    Am I looking at this wrong? I’m open to being wrong here.    
    Comments: 
    T: Haven’t looked at prices this season, but yea I definitely felt that way. Being at the games since then has felt that way as well for me. Add that to how awful the NHL TV deals are and I haven’t watched as much as I planned this year. I don’t want to have to hunt every game down, and have multiple subscriptions. Oh well, I’ll always be a Preds fan. Hoping the league can course correct a bit.
    T: We were STH for years. We stopped during the pandemic and just decided it wasn’t worth the cost anymore. The culture definitely changed over the years. While the 2017 playoffs were the high point, the 2018 season was a great atmosphere as well.
    
    After that, the crowd gradually got quieter, and it was clear that most people going weren’t regulars. I went to a game this week, and it felt like we were the only people in our section doing the chants.
    
    I still enjoy going to games, but it’s definitely different.
    T: I'm grateful for the financial stability it has brought to the franchise, but saddened that so many of us who were there during the lean years aren't able to support the team the way we used to.
    T: I hate resell sites as much as the next guy but you can often find stubhub tickets for $15-20 a pop for weeknight games up in the upper deck which is what I usually do. It would be fun to sit closer but even sitting up high is more fun than watching on tv imo so that’s what I end up doing
    T: It's not just Nashville or Hockey.
    
    From ~2005 to around 2018 the Bucks were rivaling the Brewers in terms of how cheap it was to get into games, at one point they were offering half season tickets for $99 on black Friday, then the fancy new arena got built, the team started winning, and all of a sudden people who previously had no interest on Basketball were flocking to the arena.
    
    Now if you want to go to a weekend or marquee game it's gonna cost minimum $70-80 a ticket, not including parking, food, etc, which is the reason i've gone to 6 Admirals games this season, and 1 Wednesday night Bucks game against the fucking Pistons.
    R: Yeah this stuff gets old. They need to have a section where it’s day of, cheap tickets in the upper deck or whatever that you gotta call in to get. Something. No one is asking for cheap lower bowl seats
    ''',
    "aspects": "Relevant Aspects are 'pricing'."
},
{
    "thread": '''
    Title: Am I the only one that thinks espn hates talking abt the preds
    Content:  I swear they talk abt everyone we beat and any other game that was on or abt to play rather than giving us any credit. From what I’ve seen they don’t want us here and they try to play down what we’ve done and have given us no credit saying that the teams we’ve beat didn’t play good or we got lucky. Let me know your thoughts
    Comments: 
    T: Surprise! We are a small market team. It’s been this way literally forever.
    R: How is Nashville a small market team when there are 1.5 million people in the city on any given day?
    R: Because half of them aren’t fans/from Nashville
    T: I think Brunette went over to ESPN and bullied them all to keep the talking about us to a minimum, to help our boys stay focused and keep that hungry underdog mentality!
    T: I just enjoy that they are talking. And I don’t think they “hate” us. Just a small market team. Hard to satisfy a national audience with a small amount of fans. They love Forsberg and the heart of the team.
    T: ESPN is a known bias propagandizing network . I've switched to fox more and more this year. They giving preds love.
    T: People need to understand the audience isn’t the same for ESPN anymore. Its not like it used to be where every man in the world watches ESPN. A lot of the higher quality viewers are just using the internet or their phones now to look up high lights. As a result I feel these sports shows have to be more entertaining to get ratings, and as a result they just talk about the popular teams. People this is entertainment. They aren’t out here to give a fair or honest assessment.
    R: Chicken and egg with that one. I stopped watching when ESPN became the Lebron/Tiger/Brady channel.
    ''',
    "aspects": "Relevant Aspects are 'media coverage'."
},
{
   "thread": '''
    Title: Flyers Fans visiting the area looking for the best meal in Bridgestone Arena
    Content:  Hey all, me and a bunch of friends are visiting Nashville for a bachelor party and surprising the married man to be with tickets to the game Sunday. Seeing as we're probably gonna be hungover on death's door, what's the best meal to munch on in Bridgestone?
    
    If you can drop a name and the section in the comments I'll love you forever.
    Comments: 
    T: I don’t care what anyone says, the walking taco bowl down near the main entrance always slaps. Especially if you’re a couple beers deep.
    T: If you’re dead set on eating at Bridgestone the BBQ nachos are my guilty pleasure.
    
    You’re in Nashville so do yourself a favor and walk to Martins BBQ. Everything they have is fire. A little secret not many people know is you can get the wings with the Cajun rub and then tossed in the Alabama white sauce.
    T: The grilled cheese sandwiches (Ground floor, just off to the left of the entrance) absolutely fucking slap. I would get one right away as they can take a while, but god damn are they good.
    ''',
    "aspects": "Relevant Aspects are 'stadium amenities'."
},
{
    "thread": '''
    Title: A Bizarre fun Fact about Ekholm
    Content: Ekholm has literally played for every Head Coach for the Predators Franchise!
    Comments: 
    T: He's basically Elvish in NHL terms. He's seen ages, where other players have just seen years.
    R: You literally couldn't of put it a better way he has seen everything! lol
    R: This is truly a deep comment
    T: Wow, that’s actually insane.
    T: I used to play words with friends with Ekholm. Good times
    ''',
    "aspects": "Relevant Aspects are 'miscellaneous'."
},
{
    "thread": '''
    Title: Make Bridgestone loud again!
    Content:  This is a call to arms for all fans going to the game tonight! I know things have looked bleak as of late, but we need to remind our boys of our support, and that they shouldn’t dread playing at home for us!
    
    So even if we get into a big hole, let’s all band together and be leaders in the crowd tonight and cheer our team into a win! Never give up!
    Comments: 
    T: When they jacked up the prices the crowd noise quieted down immediately. There was a precipitous drop the season after the playoff run.
    R: Prices were going up after the playoff run anyway. The only question was whether the increase would go to the Predators or to the ticket brokers.
    T: Ridiculously expensive tickets have kept the most loyal and loudest fans home.
    T: I was literally sitting at Wicked Weed watching the prices fall as we got closer to game time because I was having this exact conversation. It’s not the first time the tickets have hit $10. I posted a screenshot with a $10 lower bowl pair a couple weeks ago.
    R: $10 lower bowl seats? PM me if you ever see that again lol
    R: $20 lower bowl seats tonight. $10 in the upper bowl. Shame all the real fans are being priced out though.
    ''',
    "aspects": "Relevant Aspects are 'stadium atmosphere' and 'pricing'."
}]

prompt_template = '''
Thread: {thread}
{aspects}
'''

example_prompt = PromptTemplate(input_variables = ["thread", "aspects"], template = prompt_template)

final_prompt = FewShotPromptTemplate(
    examples = examples,
    example_prompt = example_prompt,
    suffix = "Thread: {thread}\n",
    input_variables = ["thread"],
    prefix = '''
    I am extracting aspects from a Reddit Thread made by Nashville Predators fans. The Nashville Predators are a hockey team that play at their stadium: Bridgestone Arena in Nashville, Tennessee. Their coach is Andrew Brunette, their general manager is Barry Trotz, and some of their key players are Juuce Saros, Roman Josi, Ryan O'Reilly, Filip Forsberg, Tyson Barrie, Alexandre Carrier, Jeremy Lauzon, Colton Sissons, Kevin Lankinen, Cole Smith, Kiefer Sherwood, Ryan McDonagh, and Gustav Nyquist. Any other names that are given, assume they are on an opposing team.
    For this conversational thread, please return a list of the following aspects of fan experience: 'team performance' (specifically the Predators, ignore talk about other teams), 'stadium amenities' (including food and the gift shop), 'coaching and management', 'pricing', 'stadium atmosphere' (including comfort, safety, and crowd atmosphere), 'media coverage', 'brand partners', and a 'miscellaneous' category for any aspect(s) not covered by the previous list. 
    The structure of the post will be as follows: Title is the general title made of the original post. Content is the text from the original post. Comments will be all comments on the post. Any comment labelled as "T: Comment Text" is a top-level comment, so use the original post and content as the context for this comment. Any comment labelled as "R: Comment Text" is a reply comment, so use all the comments above it until you hit a top-level comment, as well as the original post and content as the context for this comment. If the Content of a post is "nan", that means the post was an image. For any of these posts, just consider the post title and comments, ignoring the "nan" content.
    ''')

aspect_extraction_chain = LLMChain(llm = llm, prompt = final_prompt, output_key = 'aspects')
#print(final_prompt.format(thread="Test"))

Around 3500 GPT-4 tokens before any post is added in

In [6]:
## Sentiment Analysis Chain
prompt_template2 = '''
Given below thread and the extracted aspects, tell me about the sentiment of those aspects. This sentiment should be on a continuous scale of -1 to 1, where -1 is the most negative, 0 is the most neutral, and 1 is the most postive. Round the score to 2 decimal places. Follow this format: (aspect, sentiment_score).
Thread: {thread}
Aspects: {aspects}
[(Aspect1, Sentiment_Score_1), (Aspect2, Sentiment_Score_2),.....]
'''

example_prompt2 = PromptTemplate(input_variables = ["thread", "aspects"], template = prompt_template2)

aspect_sentiment_chain = LLMChain(llm = llm, prompt = example_prompt2, output_key = "Aspects_with_sentiment")

In [7]:
## Full Sequential Chain
overall_chain = SequentialChain(
    chains = [aspect_extraction_chain, aspect_sentiment_chain],
    input_variables = ["thread"],
    output_variables = ["thread", "aspects", "Aspects_with_sentiment"],
    verbose = False
)

In [8]:
## Store threads
threads = []
for post_id, post in posts_dict.items():
    thread = get_thread_for_post(post, comments_dict)
    threads.append(thread)

In [9]:
samp = random.sample(threads, k = 15)
print(samp[0])

Title: Thoughts and breakdown after the Trade
Content: Obviously, this trade sheds light once again on the Predators’ intent to revamp this roster. As the dust settles on this trade, I want to delve into the intricacies and potential ramifications for the Predators.

Ryan Johansen, known for his mercurial performances and playmaking abilities, had been a cornerstone of the Predators’ forward line for nearly a decade. However, his inconsistency and recent injury proneness rendered him a questionable investment against his hefty $8 million per year contract. By trading Johansen away, the Predators have freed up cap space and relieved themselves of a player who failed to live up to the expectations set by his substantial salary.  
When you look at the acquisition (if they even choose to sign him) of Alex Galchenyuk in this trade, the Predators received a former third-overall pick who has experienced both success and setbacks during his NHL career. While Galchenyuk spent the previous seaso

In [10]:
output = []

In [20]:
## Run on threads
## Note: won't store in output, need to learn how to store on to DF
for thread in samp:
    print("New thread analyzed")
    res = overall_chain({"thread": thread})
    temp = res["Aspects_with_sentiment"]
    matches = re.findall(r'\(([^,]+), ([^)]+)\)', temp)
    result_list = [(match[0], float(match[1])) for match in matches]
    output.append(result_list)
    

New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed
New thread analyzed


Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo-1106 in organization org-mZpDGmkoo1WsEP9bv7bbg2OB on tokens per min (TPM): Limit 60000, Used 56784, Requested 3591. Please try again in 375ms. Visit https://platform.openai.com/account/rate-limits to learn more..


New thread analyzed
New thread analyzed


Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo-1106 in organization org-mZpDGmkoo1WsEP9bv7bbg2OB on tokens per min (TPM): Limit 60000, Used 58250, Requested 4192. Please try again in 2.442s. Visit https://platform.openai.com/account/rate-limits to learn more..


New thread analyzed


In [19]:
print(output[5])
print(samp[5])

[('Relevant Aspects', -0.19), ('media coverage', -0.12), ('miscellaneous', 0.12)]
Title: Anthem protests?
Content: Do we think there will be any protesting going on during the anthem this hockey season? Sorry if this is a hot topic, but I'd like to know how brushed up on my free speech constitutional law cases I need to be when I host watch parties.

Comments:
T: Comment Text: Of course not, the majority of players aren't even American
R: Comment Text: My wife and I played the "Name the Americans on our team without looking at the roster" game last night in response to the Tennessean saying they weren't going to protest hahah
R: Comment Text: Only ones I know off the top of my head are Bitetto and Smith.
R: Comment Text: Watson too!
R: Comment Text: Full of Vikings, and hosers.

Not many yanks.
T: Comment Text: [deleted]
R: Comment Text: He said he wouldn't, I hope not.
T: Comment Text: If everyone wants politics out of it, then do away with the dog and pony show anthems altogether and

In [21]:
lengths = []
for post in threads:
    lengths.append(len(post))

In [23]:
print(max(lengths))

55031


In [25]:
word_counts = [len(sentence.split()) for sentence in threads]
max_word_count = max(word_counts)
max_index = word_counts.index(max_word_count)

In [26]:
max_index

1996

In [28]:
preds_posts.iloc[1996]

Unnamed: 0.1                                                 2846
Unnamed: 0                                                    174
id                                                         j9swsp
Content         I have brought this up to individual /r/Predat...
Author                                                 DieHippies
Post Date                                     2020-10-12 15:25:59
Name: 1996, dtype: object

In [12]:
print(samp[0])

Title: Hear me out
Content: I know it’s late in the season but do you think it’s too late to try to shake up the team? The JOFA line is undoubtedly our best producing line and teams know if they can shut them down our other lines haven’t been producing. But what if we spread the talent of the JOFA line around to boost our other lines? I know the chemistry on our top line is great but do you think a change could jumpstart our other lines? 

Ex. 

     Forsberg Boyle Jarnkrok

     Grimaldi Johansson Sissons

     Arvidsson Bonino Smith

     Fiala Turris Hartman



Would this help or hurt our chances? Have we reached a point where this is worth a shot?

Comments:
T: Comment Text: We need to make the bottom 3 better, not make the first one worse. I get your idea, though. 
R: Comment Text: Good point. I’ve just been trying to think of what the team could do short of making a big trade. 
R: Comment Text: I disagree here. We have the players that make for quality 3rd and 4th l