In [112]:
import pandas as pd
import matplotlib.pyplot as plt
from openai import OpenAI
import os
import time
import anthropic
import re

In [126]:
## Used here Claude Model as the OpenAI limits have been exhausted

In [113]:
df=pd.read_csv('Data\Cleaned_Reddit_Sentiment_Analysis.csv')
df.head()

Unnamed: 0,id,subreddit,submission_id,comment_body
0,jbnq5e9,reddevils,11nl59m,He was clearly signaling for a substitution. P...
1,jbnpoh0,reddevils,11nl59m,I'll never forget Bruno standing behind Ole wh...
2,jbnoo3x,reddevils,11nl59m,That is such a Wout goal (even though he hasn’...
3,jbnpdfj,reddevils,11nl59m,I hope Pellistri gets more game time. I think ...
4,jbnvnr2,reddevils,11nl59m,I love Bruno. \n\nGenuinely can’t believe the ...


In [114]:
comments_list = df['comment_body'].to_list()
print(len(comments_list))

383


In [127]:
## Working on the comments so we have extracted in the comments_list variable

In [115]:
claude_api_key = os.getenv('CLAUDE_API_KEY')

In [116]:
client = anthropic.Anthropic(
    # defaults to os.environ.get("ANTHROPIC_API_KEY")
    api_key=claude_api_key,
)

In [128]:
def comment_summarization(prompt, model="claude-3-5-sonnet-20240620"):
    messages = [{"role": "user", "content": prompt}]
    response = client.messages.create(
        model=model,
        messages=messages,
        max_tokens=1024,
        temperature=0,    
        )
    return response

In [129]:
## Created here model that will take the prompt and returns the response.

In [118]:
def numericTransformer(response):
    content=response.content
    match = re.findall(r'-?\d+', content[0].text)

    if match:
        first_match=int(match[0])
        return first_match
    else:
        return -5

In [131]:
## created a function that will extract the sentiment value from the claude models response.
## Used -5 if model fails to predict the sentiment but there is no value in the output file which the model fails to predict

In [119]:
SENTIMENT_PROMPT = """
Return only numbers -1, 0, or 1 for negative, neutral, or positive sentiment not any additional info regarding the context or summary: ```{comment}```
"""
sentiment_score=[]
for i, comment in enumerate(comments_list):
    prompt = SENTIMENT_PROMPT.format(comment=comment)
    response = comment_summarization(prompt)
    sentiment_score.append(numericTransformer(response))
    print(i, sentiment_score, "\n")        

0 [0] 

1 [0, 0] 

2 [0, 0, 1] 

3 [0, 0, 1, 1] 

4 [0, 0, 1, 1, 1] 

5 [0, 0, 1, 1, 1, 0] 

6 [0, 0, 1, 1, 1, 0, 0] 

7 [0, 0, 1, 1, 1, 0, 0, 1] 

8 [0, 0, 1, 1, 1, 0, 0, 1, 0] 

9 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1] 

10 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1] 

11 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0] 

12 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1] 

13 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0] 

14 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1] 

15 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1, -1] 

16 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1, -1, -1] 

17 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1, -1, -1, 0] 

18 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1, -1, -1, 0, -1] 

19 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1, -1, -1, 0, -1, 0] 

20 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1, -1, -1, 0, -1, 0, 1] 

21 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1, -1, -1, 0, -1, 0, 1, 1] 

22 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1, -1, 0, -1, 0, -1, -1, -1, 0, -1, 0, 1,

In [132]:
## Iterated over the commentsList and given it to the function

In [120]:
df["sentiment_score"]=sentiment_score

In [133]:
## added the final sentiment score to the final dataframe

In [125]:
df.to_csv("Data\FinalPreparedFile.csv",index=False)

In [134]:
## Converted it into a csv file