<a href="https://colab.research.google.com/github/AnandCV2001/Sentimental-Analysis-Project-on-YouTube-Comments/blob/main/Final_Sentment_Analysis_Project_25_02_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  **Sentiment analysis Project for YouTube Comments**

*** Importing python libraries**

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
import googleapiclient.discovery
from nltk.sentiment import SentimentIntensityAnalyzer

*** Code to scrape Youtube comments bold text bold text**

In [None]:
# Code to scrape Youtube comments
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "Enter_your_API_key" # Dont share your YouTube api to anyone

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=DEVELOPER_KEY)

request = youtube.commentThreads().list(
    part="snippet",
    videoId="Enter_the_youtube_video_link", # Dont paste the whole link , just take the portion after =
    maxResults=150                          # Eg : https://www.youtube.com/watch?v=xrW52jF_uKA&t=20s from this link just take xrW52jF_uKA , this portion .
)
response = request.execute()

comments = []

for item in response['items']:
    comment = item['snippet']['topLevelComment']['snippet']
    comments.append([
        comment['authorDisplayName'],
        comment['publishedAt'],
        comment['updatedAt'],
        comment['likeCount'],
        comment['textDisplay']
    ])

df = pd.DataFrame(comments, columns=['author', 'published_at', 'updated_at', 'like_count', 'text'])

df

In [None]:
df.head()

In [None]:
df.tail()

*** Checking weather there are null values**

In [None]:
df.isna().sum()

*** Droping the unwanted columns**

In [None]:
df.drop(['author','published_at','updated_at','like_count'],axis=1,inplace=True)
df

*** Downloading NLTK Resources**

In [None]:
nltk.download('stopwords') #It is used to remove stopwords
nltk.download('punkt') # It is used for tokenization
nltk.download('wordnet') # It is used for lematization

In [None]:
comment=df.text
df

*** Tokenization**

In [None]:
# Tokenization : It helps to convert raw text into a format that can be easily processed and analyzed by machines.
from nltk import TweetTokenizer #WordTokenizer
tk=TweetTokenizer()
comment=comment.apply(lambda x:tk.tokenize(x)).apply(lambda x:" ".join(x))
comment

*** Removing special charactors**

In [None]:
#Regular Expressinon (It is used for removing special charactors)
import re
comment=comment.str.replace('[^a-zA-Z0-9]+',' ')
comment

In [None]:
from nltk.tokenize import word_tokenize
comment=comment.apply(lambda x:' '.join([w for w in word_tokenize(x) if len(w)>=3]))

*** Stemming**

In [None]:
# Stemming is the process of reducing words to their root or base form
from nltk.stem import SnowballStemmer
stemmer=SnowballStemmer('english')
comment=comment.apply(lambda x:[stemmer.stem(i.lower()) for i in tk.tokenize(x)]).apply(lambda x:' '.join(x))
comment

*** Removing Stopwords**

In [None]:
from nltk.corpus import stopwords
stop=stopwords.words('english')
comment=comment.apply(lambda x:[i for i in word_tokenize(x) if i not in stop]).apply(lambda x:' '.join(x))
comment

*** Vectorization**

In [None]:
#Vectozisation - converting words into numbers
#Tfidf - Tearm frequency inverse documrnt frequency
from sklearn.feature_extraction.text import TfidfVectorizer
vec=TfidfVectorizer()
data=vec.fit_transform(comment)
data

In [None]:
data.shape

*** Sentiment analysis**

In [None]:
from textblob import TextBlob

def get_sentiment(data):
    analysis = TextBlob(data)
    # Polarity ranges from -1 (negative) to 1 (positive)
    polarity = analysis.sentiment.polarity
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

In [None]:
sentiments = []
for i, comment in enumerate(comment):
    sentiment = get_sentiment(comment)
    sentiments.append({"Comment": comment, "Sentiment": sentiment})

# Print sentiments
for sentiment in sentiments:
    print(sentiment)

In [None]:
columns = ["Comment", "Sentiment"]
result = pd.DataFrame(sentiments, columns=columns)
result

In [None]:
result.head()

In [None]:
result.tail()

In [None]:
# Count the occurrences of each sentiment
sentiment_counts = result['Sentiment'].value_counts()

# Convert the series to a DataFrame
sentiment_counts_df = sentiment_counts.reset_index()
sentiment_counts_df.columns = ['Sentiment', 'count']

df = pd.DataFrame(result)

*** Pie Chart**

In [None]:
# df = pd.DataFrame(result)

# Count the occurrences of each sentiment category
# sentiment_counts = df['Sentiment'].value_counts()

# Plotting the pie chart
plt.figure(figsize=(8, 8))
plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Sentiment Distribution : Pie Plot')
plt.axis('equal')

colors = ['orange', 'green', 'blue']
labels = ['Positive', 'Negative', 'Neutral']
patches = [plt.Rectangle((0,0),1,1, color=color) for color in colors]
plt.legend(patches, labels, loc='lower right')


plt.show()

*** Bar Chart**

In [None]:
#df = pd.DataFrame(result)

# Count the occurrences of each sentiment category
#sentiment_counts = df['Sentiment'].value_counts()

# Plotting the bar chart
plt.figure(figsize=(8, 6))
sentiment_counts.plot(kind='bar', color=['green', 'red', 'blue'])
plt.title('Overall Sentiment Distribution : Bar Plot ')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.xticks(rotation=0)

colors = ['red', 'green', 'blue']
labels = ['Positive', 'Negative', 'Neutral']
patches = [plt.Rectangle((0,0),1,1, color=color) for color in colors]
plt.legend(patches, labels, loc='upper right')


plt.show()

*** Donut Chart**

In [None]:
# df = pd.DataFrame(result)

# Count the occurrences of each sentiment category
# sentiment_counts = df['Sentiment'].value_counts()

# Plotting the donut chart
plt.figure(figsize=(8, 8))
plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140, wedgeprops=dict(width=0.3))
plt.title('Sentiment Distribution : Donut Plot')
# Draw a white circle at the center to make it look like a donut
centre_circle = plt.Circle((0, 0), 0.2, color='white', fc='white', linewidth=1.25)
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
colors = ['orange', 'green', 'blue']
labels = ['Positive', 'Negative', 'Neutral']
patches = [plt.Rectangle((0,0),1,1, color=color) for color in colors]
plt.legend(patches, labels, loc='lower right')
plt.axis('equal')
plt.show()