In [None]:
#Problem Statement
"""
We have a huge number of comments from Youtube for a trailer from a worldwide
production house, you as an AI serivice provider are supposed to analyse all the
comments on that trailer, get the sentiment and the score, and give a consolidated
report for the trailer about how it might perform on the box office.

"""

In [None]:
#Libraries
"""
PyTorch - torch
HuggingFace - transformers
NLTK - nltk
VADER - sentiment.vader

"""

In [None]:
!pip install torch

In [None]:
!pip install transformers

In [None]:
!pip install nltk

In [None]:
!pip install vaderSentiment

In [None]:
import pandas as pd
df = pd.read_excel("/content/John Wick Comments.xlsx")
df


In [None]:
comments = []
comments = df['Comments'].tolist()

In [None]:
for comment in comments:
  print(comment)
  print("===")

In [None]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()
"""
What are stopwords?
Words that help make up a sentence but do not have their own meaning
it, they, them, what, am, I
"""
comment_score = sia.polarity_scores("I am very happy with this movie.")
print(comment_score)

In [None]:
#for classifying my comments into positive and negative, i used an AI model from HuggingFace to get the score of the sentence, we will go with nltk/vader sentiment

In [None]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nltk.download('punkt_tab')

In [None]:
sentence = "Today is a very sunny day, I would like to go out and play football with my friends"
tokenized_comment = word_tokenize(sentence)
print("Tokenized Comment: ",tokenized_comment)

processed_comment = [ word for word in tokenized_comment if word.lower() not in stop_words]
print("Processed Comment: ",processed_comment)

In [None]:
def remove_stopwords(raw_comment):
  tokenized_comment = word_tokenize(sentence)
  processed_comment = [ word for word in tokenized_comment if word.lower() not in stop_words]
  return ' '.join(processed_comment)

In [None]:
result = remove_stopwords("Today is a very sunny day, I would like to go out and play football with my friends")
print(result)

In [None]:
import nltk
nltk.download('vader_lexicon')
nltk.download('stopwords')
nltk.download('punkt_tab')

In [None]:
from transformers import pipeline

In [None]:
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

sia = SentimentIntensityAnalyzer()
stop_words = set(stopwords.words('english'))
classifier = pipeline("sentiment-analysis", model = "distilbert/distilbert-base-uncased-finetuned-sst-2-english")

In [None]:
import pandas as pd
df = pd.read_excel("/content/John Wick Comments.xlsx")

In [None]:
comments = []
comments = df['Comments'].tolist()

In [None]:
def remove_stopwords(raw_comment):
  tokenized_comment = word_tokenize(raw_comment)
  processed_comment = [ word for word in tokenized_comment if word.lower() not in stop_words]
  return ' '.join(processed_comment)

In [None]:
def get_comment_sentiment_details(raw_comment):
  processed_comment = remove_stopwords(raw_comment)

  words = processed_comment.split()
  positive_words = ""
  negative_words = ""
  comment_sentiment = ""

  sentence_score_temp = sia.polarity_scores(processed_comment)
  abs_sentence_score = abs(sentence_score_temp['compound'])
  sentiment_label = classifier(processed_comment)
  comment_sentiment = sentiment_label[0]['label']

  if abs_sentence_score == 0:
    comment_sentiment = "NEUTRAL"

  if comment_sentiment == "NEGATIVE":
    sentence_score = abs_sentence_score * -1
    for word in  words:
      word_sentiment = sia.polarity_scores(word)
      if word_sentiment['compound'] < 0:
        negative_words += word + " "

  if comment_sentiment == "NEGATIVE":
    sentence_score = abs_sentence_score
    for word in  words:
      word_sentiment = sia.polarity_scores(word)
      if word_sentiment['compound'] > 0:
        positive_words += word + " "
  else:
    sentence_score = abs_sentence_score

  return positive_words, negative_words, sentence_score, comment_sentiment

In [None]:
positive_words = ""
negative_words = ""
neu_count = 0

pos_values_list = []
neg_values_list = []
avg_pos_score = 0
avg_neg_score = 0

for comment in comments:
  pw, nw, ss, cs = get_comment_sentiment_details(comment)
  positive_words += pw+ " "
  negative_words += nw+ " "

  if cs == "NEGATIVE":
    neg_values_list.append(ss)
  elif cs == "POSITIVE":
    pos_values_list.append(ss)
  else:
    neu_count+=1

try:
  avg_pos_score = sum(pos_values_list) / len(pos_values_list)
  avg_neg_score = sum(neg_values_list) / len(neg_values_list)
except ZeroDivisionError:
  if len(pos_values_list) == 0 or len(neg_values_list) == 0:
    avg_pos_score = 0
    avg_neg_score = 0

final_score = (avg_pos_score + avg_neg_score) / (len(pos_values_list) + len(neg_values_list))
print(final_score)

In [None]:
positive_words

In [None]:
negative_words

In [None]:
avg_pos_score

In [None]:
avg_pos_score

In [None]:
!pip install wordcloud

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

print("positives")
wordcloud_positive = WordCloud(width=800, height=400, background_color='white').generate(positive_words)

plt.figure(figsize=(10,5))
plt.imshow(wordcloud_positive, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
print("negatives")
wordcloud_negative = WordCloud(width=800, height=400, background_color='white').generate(negative_words)

plt.figure(figsize=(10,5))
plt.imshow(wordcloud_negative, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
final_score = (avg_pos_score + avg_neg_score) / (len(pos_values_list) + len(neg_values_list))
print(final_score)

In [None]:
if final_score >= 0.5:
    verdict = "Hit"
elif final_score >= 0.2:
    verdict = "Average"
else:
    verdict = "Flop"

print(f"Verdict: {verdict}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

df['comment_sentiment'] = df['Comments'].apply(lambda comment: get_comment_sentiment_details(comment)[3])

sns.countplot(x='comment_sentiment', data=df)
plt.title('Sentiment Distribution of John Wick Trailer Comments')
plt.show()