In [1]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re


In [2]:
file_path = './twitter_training.csv'
data = pd.read_csv(file_path, delimiter=',', header=None, names=['ID', 'Topic', 'Sentiment', 'Text'])


In [3]:
data

Unnamed: 0,ID,Topic,Sentiment,Text
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
...,...,...,...,...
74677,9200,Nvidia,Positive,Just realized that the Windows partition of my...
74678,9200,Nvidia,Positive,Just realized that my Mac window partition is ...
74679,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...
74680,9200,Nvidia,Positive,Just realized between the windows partition of...


In [5]:
data['Text'].isnull().sum()


686

In [6]:
data['Text'].fillna('', inplace=True)


In [8]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = text.lower()  # Convert text to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    tokens = word_tokenize(text)  # Tokenize text into words
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]  # Lemmatize tokens
    return ' '.join(tokens)  # Join tokens back into a string

data['Cleaned_Text'] = data['Text'].apply(preprocess_text)


[nltk_data] Downloading package punkt to /Users/mac/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/mac/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/mac/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [11]:
from textblob import TextBlob

def get_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity

data['Sentiment_Score'] = data['Cleaned_Text'].apply(get_sentiment)


In [10]:
!pip install textblob


Collecting textblob
  Downloading textblob-0.18.0.post0-py3-none-any.whl.metadata (4.5 kB)
Downloading textblob-0.18.0.post0-py3-none-any.whl (626 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m626.3/626.3 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: textblob
Successfully installed textblob-0.18.0.post0


In [12]:
def preprocess_text(text):
    if isinstance(text, str):  # Check if text is a string
        text = text.lower()  # Convert text to lowercase
        text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
        tokens = word_tokenize(text)  # Tokenize text into words
        tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]  # Lemmatize tokens
        return ' '.join(tokens)  # Join tokens back into a string
    else:
        return ''  # Return empty string for non-string values

# Apply revised preprocess_text function to 'Text' column
data['Cleaned_Text'] = data['Text'].apply(preprocess_text)


In [13]:
data

Unnamed: 0,ID,Topic,Sentiment,Text,Cleaned_Text,Sentiment_Score
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...,im getting borderland murder,0.0
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...,coming border kill,0.0
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...,im getting borderland kill,0.0
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...,im coming borderland murder,0.0
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...,im getting borderland 2 murder,0.0
...,...,...,...,...,...,...
74677,9200,Nvidia,Positive,Just realized that the Windows partition of my...,realized window partition mac like 6 year behi...,-0.4
74678,9200,Nvidia,Positive,Just realized that my Mac window partition is ...,realized mac window partition 6 year behind nv...,-0.4
74679,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...,realized window partition mac 6 year behind nv...,-0.4
74680,9200,Nvidia,Positive,Just realized between the windows partition of...,realized window partition mac like 6 year behi...,-0.5


In [19]:
from textblob import TextBlob

# Example sentiment analysis using TextBlob
text = ""
blob = TextBlob(text)
sentiment_score = blob.sentiment.polarity

if sentiment_score > 0:
    print("Positive sentiment")
elif sentiment_score < 0:
    print("Negative sentiment")
else:
    print("Neutral sentiment")


Neutral sentiment
