In [1]:
pip install textblob



In [2]:
from textblob import TextBlob

In [3]:
with open('Burbank.txt', 'r') as file:
    burbank_text = file.read()
blob = TextBlob(burbank_text)
sentiment = blob.sentiment.polarity
if sentiment < 0:
    print("The sentiment is negative.")
else:
    print("The sentiment is not negative.")

probability_negative = 1 if sentiment < 0 else 0
print(f"Probability that the sentiment is negative: {probability_negative}")

The sentiment is not negative.
Probability that the sentiment is negative: 0


###Exercise 2

In [4]:
sentiment = blob.sentiment.polarity
subjectivity = blob.sentiment.subjectivity
print(f"Overall Sentiment: {sentiment}, Subjectivity: {subjectivity}")

Overall Sentiment: 0.09869334480780263, Subjectivity: 0.3790877796901893


###Exercise 3


In [5]:
from textblob import Word
!pip install textblob nltk
import nltk
nltk.download('punkt')
nltk.download('wordnet') # Download the wordnet corpus

words = blob.words
word_frequencies = {}
for word in words:
    word = Word(word).lemmatize()
    word_frequencies[word] = word_frequencies.get(word, 0) + 1
sorted_words = sorted(word_frequencies.items(), key=lambda x: x[1], reverse=True)
key_topics = sorted_words[:10]  # Top 10 frequent words
print("Key topics: ", key_topics)



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Key topics:  [('the', 100), ('of', 50), ('to', 39), ('and', 36), ('a', 22), ('that', 20), ('in', 19), ('flight', 15), ('The', 14), ('FAA', 13)]


###Exercise 4


In [6]:
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [7]:
import pandas as pd

data_amazon = pd.read_csv('amazon_cells_labelled.txt', sep='\t', header=None)
data_imdb = pd.read_csv('imdb_labelled.txt', sep='\t', header=None)
data_yelp = pd.read_csv('yelp_labelled.txt', sep='\t', header=None)

data_amazon.columns = ['Review', 'Label']
data_imdb.columns = ['Review', 'Label']
data_yelp.columns = ['Review', 'Label']

data_amazon['Company'] = 'Amazon'
data_imdb['Company'] = 'IMDB'
data_yelp['Company'] = 'Yelp'

comb_data = pd.concat([data_amazon, data_imdb, data_yelp], ignore_index=True)
comb_data.to_csv('Sentiment_Analysis_Dataset.csv', index=False)
print(comb_data.columns)
print(comb_data.isnull().sum())

Index(['Review', 'Label', 'Company'], dtype='object')
Review     0
Label      0
Company    0
dtype: int64


In [8]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import string

nlp = spacy.load('en_core_web_sm')
stopwords = list(STOP_WORDS)
punctuations = string.punctuation

def clean_text(text):
    doc = nlp(text)
    tokens = [token.text for token in doc if token.text not in stopwords and token.text not in punctuations]
    return tokens

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

X = comb_data['Review']
y = comb_data['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipe_tfidf = Pipeline([('tfidf', TfidfVectorizer()), ('svc', LinearSVC())])

pipe_tfidf.fit(X_train, y_train)

predictions = pipe_tfidf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)

print(f"Accuracy: {accuracy}")

Accuracy: 0.8363636363636363


In [11]:
sample_review = "This product was awesome, I loved it!"
pred = pipe_tfidf.predict([sample_review])
print(f"{sample_review}, Prediction ➔ {pred}")

This product was awesome, I loved it!, Prediction ➔ [1]
