In [64]:
import pandas as pd
import re
from transformers import GPT2Tokenizer, GPT2LMHeadModel, pipeline

# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Set the padding token to eos_token since GPT-2 does not have a default padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [65]:
df = pd.DataFrame(pd.read_csv("test_data.csv"))

In [66]:
# Display sample of preprocessed reviews
print("Sample preprocessed reviews:")
print(df)

Sample preprocessed reviews:
                                              prompt  completion
0  A movie theater with a bad history of past gru...           2
1  The first time I saw this film  I wanted to li...           1
2  I have watched some pretty poor films in the p...           2
3  The fact that a film is on DVD doesn t guarant...           2
4  I m not a huge Star Trek fan  but I was lookin...           2
5  I loved the gorgeous Greek scenery but the sto...           2
6  I remember this game  It was always sitting on...           1
7  OK i own this DVD i got it new at amazon    i ...           2
8  A splendid example of how Hollywood could  and...           2


In [67]:
def get_sentiment(text):
    prompt = (f"Person A: 'What do you think about this review of the movie, in one word: postive or negative? \"{text}\"'\n"
            "Person B: 'Based on the review, I would say the sentiment is likely")
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1000)
    outputs = model.generate(
        **inputs,
        max_length=1000,  # Adjusted max_length to prevent too long outputs
        pad_token_id=tokenizer.pad_token_id,
        temperature=0.7,  # Adjusted temperature for less determinism
        top_p=0.9,  # Enabled nucleus sampling
        no_repeat_ngram_size=2  # Prevent repetition of n-grams
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [68]:
# Test the sentiment analysis function with a sample review
test_review = "This movie was an excellent portrayal of historical events."
result = get_sentiment(test_review) 
print("Sentiment Prediction:", result)



Sentiment Prediction: Person A: 'What do you think about this review of the movie, in one word: postive or negative? "This movie was an excellent portrayal of historical events."'
Person B: 'Based on the review, I would say the sentiment is likely to be positive. I think it's a good movie.'
'Person C: "I think the film is a great movie. It's not a bad movie."
"Person D: I'm not sure if it is. But I do think that it has a lot of potential. The movie is very well done. There are a few things that I don't like about it. One is that the characters are very different from the ones in the original. And the story is not very interesting. So I wouldn't say that this is the best movie of all time.
The movie has been nominated for an Academy Award for Best Picture, Best Director, and Best Original Screenplay.


In [69]:
# Apply sentiment analysis and display some responses
df['model_response'] = df['prompt'].apply(get_sentiment)
print("Sample model responses:")
print(df[['prompt', 'model_response']].head(5))


Sample model responses:
                                              prompt  \
0  A movie theater with a bad history of past gru...   
1  The first time I saw this film  I wanted to li...   
2  I have watched some pretty poor films in the p...   
3  The fact that a film is on DVD doesn t guarant...   
4  I m not a huge Star Trek fan  but I was lookin...   

                                      model_response  
0  Person A: 'What do you think about this review...  
1  Person A: 'What do you think about this review...  
2  Person A: 'What do you think about this review...  
3  Person A: 'What do you think about this review...  
4  Person A: 'What do you think about this review...  


In [71]:
# Try to extraxt sentiment from the respone
def extract_sentiment(response):
    # Regex to find "positive" or "negative" after Person B's statement
    match = re.search(r"Person B: 'Based on the review, I would say the sentiment is likely to be (positive|negative)", response)
    if match:
        return match.group(1)
    return "Sentiment not clear or not found"


In [72]:

# Apply function
df['sentiment'] = df['model_response'].apply(extract_sentiment)


print(df)
df.to_csv('filename.csv', index=False)


                                              prompt  completion  \
0  A movie theater with a bad history of past gru...           2   
1  The first time I saw this film  I wanted to li...           1   
2  I have watched some pretty poor films in the p...           2   
3  The fact that a film is on DVD doesn t guarant...           2   
4  I m not a huge Star Trek fan  but I was lookin...           2   
5  I loved the gorgeous Greek scenery but the sto...           2   
6  I remember this game  It was always sitting on...           1   
7  OK i own this DVD i got it new at amazon    i ...           2   
8  A splendid example of how Hollywood could  and...           2   

                                      model_response  \
0  Person A: 'What do you think about this review...   
1  Person A: 'What do you think about this review...   
2  Person A: 'What do you think about this review...   
3  Person A: 'What do you think about this review...   
4  Person A: 'What do you think about t