# Task-1: build your own lexicon sentiment analyser


In [1]:
import pandas as pd
import spacy
from nltk.corpus import stopwords

# Download necessary resources
import nltk
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [3]:
# Load the SpaCy model
nlp = spacy.load("en_core_web_sm")

# Load dataset
data = pd.read_csv('tweets.csv')
data

Unnamed: 0,Tweet ID,Text,User,Created At,Likes,Retweets,Sentiment
0,449211727471646420,Feeling grateful for my friends and family.,werickson,2023-01-13 00:35:08,156,489,positive
1,519036665081652813,Going for a walk in the park.,jennybutler,2023-02-16 06:24:30,223,788,neutral
2,776023316169815671,I hate it when things don't go my way.,william88,2023-01-24 18:12:37,332,860,negative
3,674750468135750054,I hate it when things don't go my way.,lawrencebauer,2023-02-09 07:14:24,388,881,negative
4,859726107390311299,This is the best day ever!,gerald07,2023-02-28 06:55:54,255,567,positive
...,...,...,...,...,...,...,...
995,250464848751217010,I hate it when things don't go my way.,nhayes,2023-01-28 05:03:18,986,932,negative
996,600819966000157055,I hate it when things don't go my way.,marknixon,2023-04-21 13:27:44,458,61,negative
997,966366146192109165,I'm so upset right now.,hollyflores,2023-03-08 11:29:25,317,179,negative
998,936627265507507170,Just had lunch with a friend.,odickerson,2023-04-09 18:32:54,584,706,neutral


In [5]:
# Clean the dataset
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    tokens = [word for word in text.split() if word not in stop_words]  # Remove stopwords
    return " ".join(tokens)

data['cleaned_text'] = data['Text'].apply(clean_text)

In [6]:
# Perform POS tagging and extract adjectives
def extract_adjectives(text):
    doc = nlp(text)
    adjectives = [token.text for token in doc if token.pos_ == "ADJ"]
    return adjectives

data['adjectives'] = data['cleaned_text'].apply(extract_adjectives)

In [8]:
# Create the lexicon with polarity weight
lexicon = []
for index, row in data.iterrows():
    # Assign weights: +1 for positive, -1 for negative, 0 for neutral
    if row['Sentiment'] == 'positive':
        weight = 1
    elif row['Sentiment'] == 'negative':
        weight = -1
    else:  # Neutral sentiment
        weight = 0

    # Add each adjective with its weight to the lexicon
    for adj in row['adjectives']:
        lexicon.append({'word': adj, 'polarity': weight})

In [9]:
# Convert to DataFrame and save as CSV
lexicon_df = pd.DataFrame(lexicon)
lexicon_df.to_csv('lexicon_sentiment.csv', index=False)

print("Lexicon with polarity weights saved to 'lexicon_sentiment.csv'.")

Lexicon with polarity weights saved to 'lexicon_sentiment.csv'.



NAME: Mohamed Moubarak Mohamed Misbahou Mkouboi<br>
MATRIC NO: P139575<br>