# Extra Credit Sentiment Analysis

### Importing the required libraries

In [10]:
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from gensim.models import KeyedVectors
from gensim.models import Word2Vec

In [2]:
import nltk
nltk.download('stopwords')
import nltk
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

### Loading the data and taking first 2000 rows of the description column

In [26]:
netflix_data = pd.read_csv('netflix_titles.csv')
df = netflix_data['description']

### Cleaning and preprocessing the data
Converted text to lowercase, special characters and punctuation were removed and stop words were removed

In [27]:
stopwords_list = set(stopwords.words('english'))
def textcleaning(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = ' '.join(word for word in text.split() if word not in stopwords_list)
    return text

clean_df = df.apply(textcleaning)

### Training Word2Vec model

In [28]:
sentences = [description.split() for description in df]
model = Word2Vec(sentences, window=5, min_count=1, workers=4)
model.save("word2vec_model.bin")

### Performing Sentiment Analysis on cleaned Description column

In [29]:
model = Word2Vec.load("word2vec_model.bin")
sia = SentimentIntensityAnalyzer()
sentiments = []
for description in clean_df:
    vector_sum = sum(model.wv.get_vector(word) for word in description.split() if word in model.wv.key_to_index)
    sentiment = sia.polarity_scores(' '.join(vector_sum.astype(str)))
    sentiments.append(sentiment)

positive_count = sum(1 for sentiment in sentiments if sentiment['compound'] > 0)
negative_count = sum(1 for sentiment in sentiments if sentiment['compound'] < 0)
neutral_count = len(sentiments) - positive_count - negative_count

In [7]:
print("Sentiment Analysis Results:")
print("Positive: ", positive_count)
print("Negative: ", negative_count)
print("Neutral: ", neutral_count)

Sentiment Analysis Results:
Positive:  965
Negative:  862
Neutral:  173


### Printing the first 6 Descriptions and their Predicted Sentiment

In [8]:
for i in range(6):
    print(i + 1, "Description:", df[i])
    print("Predicted Sentiment:", sentiments[i])
    print()

1 Description: father nears end life filmmaker kirsten johnson stages death inventive comical ways help face inevitable
Predicted Sentiment: {'neg': 0.199, 'neu': 0.663, 'pos': 0.138, 'compound': -0.296}

2 Description: crossing paths party cape town teen sets prove whether privateschool swimming star sister abducted birth
Predicted Sentiment: {'neg': 0.174, 'neu': 0.684, 'pos': 0.142, 'compound': -0.1531}

3 Description: protect family powerful drug lord skilled thief mehdi expert team robbers pulled violent deadly turf war
Predicted Sentiment: {'neg': 0.406, 'neu': 0.399, 'pos': 0.196, 'compound': -0.7783}

4 Description: feuds flirtations toilet talk go among incarcerated women orleans justice center new orleans gritty reality series
Predicted Sentiment: {'neg': 0.176, 'neu': 0.653, 'pos': 0.171, 'compound': 0.2263}

5 Description: city coaching centers known train indias finest collegiate minds earnest unexceptional student friends navigate campus life
Predicted Sentiment: {'neg': 