# Import Library

In [1]:
import nltk
import random
import os
import numpy as np
from nltk.corpus import twitter_samples
from matplotlib import pyplot as plt
from nltk.tokenize import TweetTokenizer
import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer


# Load Dataset

In [2]:
# DOWNLOAD DATASET

nltk.download('twitter_samples', download_dir= os.getcwd())

[nltk_data] Downloading package twitter_samples to
[nltk_data]     c:\Tensorflow\NLP\NLP Twitter...
[nltk_data]   Package twitter_samples is already up-to-date!


True

In [3]:
import os

# Print the contents of the directory
print(os.listdir(r'C:\Tensorflow\NLP\NLP Twitter\corpora\twitter_samples'))

['negative_tweets.json', 'positive_tweets.json', 'README.md', 'README.txt', 'tweets.20150430-223406.json']


In [4]:
# LOAD DATASET 

current_directory = os.getcwd()

# MOVE NLTK TO THAT PATH
nltk.data.path.append(r'C:\Tensorflow\NLP\NLP Twitter')

all_positive_tweets = twitter_samples.strings('positive_tweets.json')
all_negative_tweets = twitter_samples.strings('negative_tweets.json')

print(f'Positive Tweets has {len(all_positive_tweets)} samples')
print(f'Negative Tweets has {len(all_negative_tweets)} samples')

Positive Tweets has 5000 samples
Negative Tweets has 5000 samples


### Display 10 Positive Tweets

In [5]:
# DISPLAY 5 SAMPLES FROM POSITIVE TWEETS

for i in range(0,10):
    print(f'Tweet {i+1} : {all_positive_tweets[i]}')

Tweet 1 : #FollowFriday @France_Inte @PKuchly57 @Milipol_Paris for being top engaged members in my community this week :)
Tweet 2 : @Lamb2ja Hey James! How odd :/ Please call our Contact Centre on 02392441234 and we will be able to assist you :) Many thanks!
Tweet 3 : @DespiteOfficial we had a listen last night :) As You Bleed is an amazing track. When are you in Scotland?!
Tweet 4 : @97sides CONGRATS :)
Tweet 5 : yeaaaah yippppy!!!  my accnt verified rqst has succeed got a blue tick mark on my fb profile :) in 15 days
Tweet 6 : @BhaktisBanter @PallaviRuhail This one is irresistible :)
#FlipkartFashionFriday http://t.co/EbZ0L2VENM
Tweet 7 : We don't like to keep our lovely customers waiting for long! We hope you enjoy! Happy Friday! - LWWF :) https://t.co/smyYriipxI
Tweet 8 : @Impatientraider On second thought, there’s just not enough time for a DD :) But new shorts entering system. Sheep must be buying.
Tweet 9 : Jgh , but we have to go to Bayan :D bye
Tweet 10 : As an act of mischiev

In [6]:
# DISPLAY 5 SAMPLES FROM NEGATIVE TWEETS

i = 0

for tweet in all_negative_tweets:
    if i == 10:
        break

    print(f'Tweet {i+1} : {tweet}')
    i += 1

Tweet 1 : hopeless for tmr :(
Tweet 2 : Everything in the kids section of IKEA is so cute. Shame I'm nearly 19 in 2 months :(
Tweet 3 : @Hegelbon That heart sliding into the waste basket. :(
Tweet 4 : “@ketchBurning: I hate Japanese call him "bani" :( :(”

Me too
Tweet 5 : Dang starting next week I have "work" :(
Tweet 6 : oh god, my babies' faces :( https://t.co/9fcwGvaki0
Tweet 7 : @RileyMcDonough make me smile :((
Tweet 8 : @f0ggstar @stuartthull work neighbour on motors. Asked why and he said hates the updates on search :( http://t.co/XvmTUikWln
Tweet 9 : why?:("@tahuodyy: sialan:( https://t.co/Hv1i0xcrL2"
Tweet 10 : Athabasca glacier was there in #1948 :-( #athabasca #glacier #jasper #jaspernationalpark #alberta #explorealberta #… http://t.co/dZZdqmf7Cz


# Preprocess Raw Text

## Splitting Data

In [7]:
# SPLIT DATA INTO TRAIN AND TEST

train_data = np.concatenate((all_positive_tweets[:4000], all_negative_tweets[:4000]), axis=0)
test_data  = np.concatenate((all_positive_tweets[4000:5000], all_negative_tweets[4000:5000]), axis=0)

len(train_data) , len(test_data) , train_data[:2] , test_data[:2]


(8000,
 2000,
 array(['#FollowFriday @France_Inte @PKuchly57 @Milipol_Paris for being top engaged members in my community this week :)',
        '@Lamb2ja Hey James! How odd :/ Please call our Contact Centre on 02392441234 and we will be able to assist you :) Many thanks!'],
       dtype='<U152'),
 array(['Bro:U wan cut hair anot,ur hair long Liao bo\nMe:since ord liao,take it easy lor treat as save $ leave it longer :)\nBro:LOL Sibei xialan',
        "@heyclaireee is back! thnx God!!! i'm so happy :)"], dtype='<U146'))

## Create a Label for every Tweet

In [8]:
# CREATE LABEL . IF THEY POSITIVE TWEET, GIVE THEM 1  .   AND IF THEY NEGATIVE TWEET , GIVE THEM 0

train_positive = all_positive_tweets[:4000]
train_negative = all_negative_tweets[:4000]

test_positive = all_positive_tweets[4000:5000]
test_negative = all_negative_tweets[4000:5000]

#            FILL VALUE 1        shape=(4000,1)          FILL VALUE 0  shape=(4000,1)
train_label = np.append(np.ones( shape=( len(train_positive),1) ), np.zeros( shape=( len(train_negative),1) ), axis=0)

#           FILL VALUE 1         shape=(1000,1)          FILL VALUE 0  shape=(1000,1)
test_label  = np.append(np.ones( shape=( len(test_positive),1) ) , np.zeros( shape=( len(test_negative),1) ), axis=0)


# DEBUGGING
len(train_label) , len(test_label) , train_label , test_label

(8000,
 2000,
 array([[1.],
        [1.],
        [1.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[1.],
        [1.],
        [1.],
        ...,
        [0.],
        [0.],
        [0.]]))

In [9]:
# VISUALIZE DATASET TWEETS USING DATAFRAME 

import pandas as pd

data = {
    'Tweets': train_data,
    'Label' : train_label.reshape(-1)
}

df = pd.DataFrame(data)

df

Unnamed: 0,Tweets,Label
0,#FollowFriday @France_Inte @PKuchly57 @Milipol...,1.0
1,@Lamb2ja Hey James! How odd :/ Please call our...,1.0
2,@DespiteOfficial we had a listen last night :)...,1.0
3,@97sides CONGRATS :),1.0
4,yeaaaah yippppy!!! my accnt verified rqst has...,1.0
...,...,...
7995,Amelia didnt stalk my twitter :(,0.0
7996,"oh, i missed the broadcast. : (",0.0
7997,i really can't stream on melon i feel useless :-(,0.0
7998,I need to stop looking at old soccer pictures :(,0.0


## Text Cleaning

In [10]:
# CLEANING TWEETS

import re  # USING REGEX TO CLEANING TEXT

def TextCleaning(tweet):

    # REMOVE HYPERLINK
    cleaning_tweets = re.sub(pattern=r'https?://[^\n\r\s]+', repl= '', string= tweet)

    # REMOVE USENAME
    cleaning_tweets = re.sub(pattern=r'@\w+', repl='', string= cleaning_tweets)

    # REMOVE HASHTAGS
    cleaning_tweets = re.sub(pattern=r'#', repl='', string= cleaning_tweets)

    return cleaning_tweets


#### Display Function

In [11]:
# DISPLAY TWEETS AFTER CLEANED

# BEFORE CLEANING
print('Before Clean :')
for tweet in train_data[:5]:
    print(tweet)

i = 0
cleaned_tweets = []

print('\nAfter Clean :')
for tweet in train_data:
    clean_tweet = TextCleaning(tweet)

    # JUST PRINT THE FIRST 5 TWEET
    if i < 5:
        print(clean_tweet)
    i += 1
    cleaned_tweets.append(clean_tweet)

Before Clean :
#FollowFriday @France_Inte @PKuchly57 @Milipol_Paris for being top engaged members in my community this week :)
@Lamb2ja Hey James! How odd :/ Please call our Contact Centre on 02392441234 and we will be able to assist you :) Many thanks!
@DespiteOfficial we had a listen last night :) As You Bleed is an amazing track. When are you in Scotland?!
@97sides CONGRATS :)
yeaaaah yippppy!!!  my accnt verified rqst has succeed got a blue tick mark on my fb profile :) in 15 days

After Clean :
FollowFriday    for being top engaged members in my community this week :)
 Hey James! How odd :/ Please call our Contact Centre on 02392441234 and we will be able to assist you :) Many thanks!
 we had a listen last night :) As You Bleed is an amazing track. When are you in Scotland?!
 CONGRATS :)
yeaaaah yippppy!!!  my accnt verified rqst has succeed got a blue tick mark on my fb profile :) in 15 days


## Tokenization

In [12]:
# TRANSFORM TWEET INTO EACH TOKEN

def Tokenization(tweet):

    # DECLARE TOKENIZER
    tokenizer = TweetTokenizer(preserve_case=False, reduce_len=True, strip_handles=True)

    # TRANSFORM TO EVERY TWEETS/STRINGS
    tweet_token= tokenizer.tokenize(tweet)

    return tweet_token


In [13]:
# DISPLAY TWEET AFTER TOKENIZATION

print('Tweet After Tokenization :')

i = 0
tweet_tokens = []

for tweet in cleaned_tweets:
    tweet_token = Tokenization(tweet)

    # JUST PRINT THE FIRST 5 TWEET
    if i < 5:
        print(tweet_token)

    i += 1
    tweet_tokens.append(tweet_token)

Tweet After Tokenization :
['followfriday', 'for', 'being', 'top', 'engaged', 'members', 'in', 'my', 'community', 'this', 'week', ':)']
['hey', 'james', '!', 'how', 'odd', ':/', 'please', 'call', 'our', 'contact', 'centre', 'on', '02392441234', 'and', 'we', 'will', 'be', 'able', 'to', 'assist', 'you', ':)', 'many', 'thanks', '!']
['we', 'had', 'a', 'listen', 'last', 'night', ':)', 'as', 'you', 'bleed', 'is', 'an', 'amazing', 'track', '.', 'when', 'are', 'you', 'in', 'scotland', '?', '!']
['congrats', ':)']
['yeaaah', 'yipppy', '!', '!', '!', 'my', 'accnt', 'verified', 'rqst', 'has', 'succeed', 'got', 'a', 'blue', 'tick', 'mark', 'on', 'my', 'fb', 'profile', ':)', 'in', '15', 'days']


## Remove Stop Words and Punctuation

In [14]:
# GET STOPWORDS AND PUNCTUATION

# DOWNLOAD STOPWORDS
nltk.download(info_or_id='stopwords')

stopwords_english = stopwords.words('english')

print(f'size stopwords : {len(stopwords_english)}')
print(f'Stopwords : {stopwords_english[:20]}')  # DISPLAY 20 STOPWORDS

print(f'Punctuation : {string.punctuation}\n')

size stopwords : 179
Stopwords : ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his']
Punctuation : !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\aliff\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [15]:
# REMOVE STOPWORDS AND PUNCTUATION

def remove_StopWords(tweet):

    cleaned_token = []

    # ITERATE EVERY TOKEN IN A TWEET         
    for word in tweet:   

        # IF TOKEN DOESNT CONTAIN STOPWORDS AND PUNCTUATION
        if (word not in stopwords_english and word not in string.punctuation):
            cleaned_token.append(word) # PUSH THAT TOKEN
                

    return cleaned_token


In [16]:
# DISPLAY TWEET AFTER REMOVED STOP WORDS

i = 0
cleaned_tweets = []

print('After Removed Stopwords and Punctuation :')
for tweet in tweet_tokens:
    tweet_clean = remove_StopWords(tweet)

    # JUST PRINT THE FIRST 5 TWEET
    if i < 5:
        print(tweet_clean)

    i += 1
    cleaned_tweets.append(tweet_clean)

After Removed Stopwords and Punctuation :
['followfriday', 'top', 'engaged', 'members', 'community', 'week', ':)']
['hey', 'james', 'odd', ':/', 'please', 'call', 'contact', 'centre', '02392441234', 'able', 'assist', ':)', 'many', 'thanks']
['listen', 'last', 'night', ':)', 'bleed', 'amazing', 'track', 'scotland']
['congrats', ':)']
['yeaaah', 'yipppy', 'accnt', 'verified', 'rqst', 'succeed', 'got', 'blue', 'tick', 'mark', 'fb', 'profile', ':)', '15', 'days']


## Stemming

In [17]:
# STEMMING USING PORTER STEMMING ALGORITHM


def stemming(tweet):
    stemmer = PorterStemmer()
    
    tweet_stemmed = []  # TO STORE STEMMED TWEET
    
        # ITERATE EACH WORD/TOKEN IN TWEET
    for word in tweet:
        stem_word = stemmer.stem(word)  # STEMMING
        tweet_stemmed.append(stem_word)  # PUSH STEMMED TOKEN

    return tweet_stemmed


In [18]:
# DISPLAY TWEET/TOKEN AFTER STEMMING

i = 0
tweet_stemmed = []

print('Tweet After Stemming : ')

for tweet in cleaned_tweets:
    stem = stemming(tweet)

    # JUST PRINT THE FIRST 5 TWEET
    if i < 5:
        print(stem)

    i += 1
    tweet_stemmed.append(stem)

Tweet After Stemming : 
['followfriday', 'top', 'engag', 'member', 'commun', 'week', ':)']
['hey', 'jame', 'odd', ':/', 'pleas', 'call', 'contact', 'centr', '02392441234', 'abl', 'assist', ':)', 'mani', 'thank']
['listen', 'last', 'night', ':)', 'bleed', 'amaz', 'track', 'scotland']
['congrat', ':)']
['yeaaah', 'yipppi', 'accnt', 'verifi', 'rqst', 'succeed', 'got', 'blue', 'tick', 'mark', 'fb', 'profil', ':)', '15', 'day']


## Frequency Each Token

In [19]:
# FREQUENCY DICT 

frequency_token = {}

# ITERATE EVERY LABEL AND TWEETS
for label , tweets in zip(train_label, tweet_stemmed):
    for word in tweets:
        pair = (word, int(label))

        if pair in frequency_token:
            frequency_token[pair] += 1
        else:
            frequency_token[pair] = 1

# DISPLAY RESULT 
len(frequency_token) , frequency_token

  pair = (word, int(label))


(11420,
 {('followfriday', 1): 23,
  ('top', 1): 30,
  ('engag', 1): 7,
  ('member', 1): 14,
  ('commun', 1): 27,
  ('week', 1): 72,
  (':)', 1): 2960,
  ('hey', 1): 60,
  ('jame', 1): 7,
  ('odd', 1): 2,
  (':/', 1): 5,
  ('pleas', 1): 81,
  ('call', 1): 27,
  ('contact', 1): 4,
  ('centr', 1): 1,
  ('02392441234', 1): 1,
  ('abl', 1): 6,
  ('assist', 1): 1,
  ('mani', 1): 28,
  ('thank', 1): 522,
  ('listen', 1): 15,
  ('last', 1): 39,
  ('night', 1): 55,
  ('bleed', 1): 2,
  ('amaz', 1): 41,
  ('track', 1): 5,
  ('scotland', 1): 2,
  ('congrat', 1): 15,
  ('yeaaah', 1): 1,
  ('yipppi', 1): 1,
  ('accnt', 1): 2,
  ('verifi', 1): 2,
  ('rqst', 1): 1,
  ('succeed', 1): 1,
  ('got', 1): 57,
  ('blue', 1): 8,
  ('tick', 1): 1,
  ('mark', 1): 1,
  ('fb', 1): 4,
  ('profil', 1): 2,
  ('15', 1): 4,
  ('day', 1): 187,
  ('one', 1): 92,
  ('irresist', 1): 2,
  ('flipkartfashionfriday', 1): 16,
  ('like', 1): 187,
  ('keep', 1): 55,
  ('love', 1): 336,
  ('custom', 1): 4,
  ('wait', 1): 55,
  

# Train Model

before we train the model , create Extract Features First <br><br>
Extract Features to faster Training and use them for training instead tweet dataset

## Extract Features

In [20]:
# BUILD EXTRACT FEATURES FUNCTION

def Extract_Feature(tweet, frequency_token):

    # CREATE ARRAY WITH 3 INDEX VALUE 0
    index = np.zeros(shape=3)

    '''
    index[0] is bias with initial value 1
    index[1] is sum of frequency every word/token in positive label
    index[2] is sum of frequency every word/token in negative label
    '''

    index[0] = 1  # DECLARE BIAS
    
    for word in tweet:
        index[1] += frequency_token.get((word, 1) , 0)     # TOTAL FREQUENCY WORD IN POSITIVE LABELS
        index[2] += frequency_token.get((word, 0) , 0)     # TOTAL FREQUENCY WORD IN NEGATIVE LABELS


    # ADD BATCH DIMENSION , SO IT CAN BE TRAIN
    index = index[None, :]

    return index


In [21]:
# CREATE FEATURE EXTRACTION WITH shape=(8000,3)
feature_extraction = np.zeros(shape= (len(train_data), 3))

# EXTRACT FEATURE FOR EACH TWEET IN TRAIN DATA
for i in range(len(train_data)):
    feature_extraction[i,:] = Extract_Feature(tweet_stemmed[i], frequency_token)

feature_extraction

array([[1.000e+00, 3.133e+03, 6.100e+01],
       [1.000e+00, 3.705e+03, 4.440e+02],
       [1.000e+00, 3.119e+03, 1.160e+02],
       ...,
       [1.000e+00, 1.440e+02, 7.930e+02],
       [1.000e+00, 2.070e+02, 3.902e+03],
       [1.000e+00, 1.910e+02, 3.986e+03]])

## Create Logistic Regression Algorithm

First we make Sigmoid Formula

<pre>
<strong>Sigmoid Formula :      σ(x)= 1 / (1+e^−x)
</pre>

In [22]:
# CREATE SIGMOID FUNCTION

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

next create Gradient Descent formula to update Parameter

<pre>
Gradient Descent Formula :  θ --> θ − α . ∇θJ(θ)
∇ θJ(θ)= 1/m * (X^T * (h−y))
where : 
       ∇θJ(θ)  = First Derivative from that Cost Function
       α       = Learning Rate
       θ --> θ = Update Parameter (B0 , B1 , .... Bn)
</pre>

In [23]:
# CREATE GRADIENT DESCENT

def gradient_descent(x, y, theta, learning_rate, num_iterations):

    N = len(y)  # NUMBER OF SAMPLE DATA

    for i in range(num_iterations):

        # COMPUTE Z VALUE (logit value)
        z = np.dot(x , theta)

        # TRANSFORM Z INTO VALUE BETWEEN 0 - 1
        y_pred = sigmoid(z)

        # DISPLAY LOSS USING BINARY CROSS ENTROPY
        J = (-1/N) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

        # UPDATE PARAMETER 
        loss     = y_pred - y
        gradient = 1 / N * np.dot(x.T, loss )
        theta    = theta - learning_rate * gradient

    J = float(J)

    return J , theta

after we create formula for Logistic Regression , now we fit them

In [24]:
# TRAIN THE MODEL

cost , theta = gradient_descent(x = feature_extraction, y = train_label, theta= np.zeros((3,1)), learning_rate= 1e-8, num_iterations= 1500)

print('Cost Function : ', cost)
print(f'\nParameter : \n', theta)

Cost Function :  0.09040495433090542

Parameter : 
 [[ 4.07804451e-07]
 [ 1.47199549e-03]
 [-1.23819836e-03]]


# Test a Model

In [25]:
# PREDICT A TWEET 

def predict_tweet(tweet, theta, frequency_token):

    # DO PRE-PROCESS TEXT
    clean_tweet   = TextCleaning(tweet)
    tweet_token   = Tokenization(clean_tweet)
    cleaned_token = remove_StopWords(tweet_token)
    stemmed_token = stemming(cleaned_token)

    # TRANSFORM INTO EXTRACTED FEATURE
    tweet_extract = Extract_Feature(stemmed_token, frequency_token)

    # COMPUTE LOGIT VALUE
    z = np.dot(tweet_extract , theta)

    y_pred = sigmoid(z)

    return y_pred


In [26]:
# LETS PREDICT SOME TWEET FROM POSITIVE AND NEGATIVE TWEETS

print('Positive Tweets : ')
for i in range(0,5):
    random_index = random.randint(0, len(all_positive_tweets) - 1)
    print(f' Tweet {i+1} : {all_positive_tweets[random_index]} --> {predict_tweet(all_positive_tweets[random_index], theta , frequency_token)}')

print()

print('Negative Tweets : ')
for i in range(0,5):
    random_index = random.randint(0, len(all_negative_tweets) - 1)
    print(f' Tweet {i+1} : {all_negative_tweets[random_index]} --> {predict_tweet(all_negative_tweets[random_index], theta , frequency_token)}')


Positive Tweets : 
 Tweet 1 : Found some goodwill gold today :-). --> [[0.70096132]]
 Tweet 2 : Stats for the day have arrived. 2 new followers and NO unfollowers :) via http://t.co/6LyskBfqFG. --> [[0.99452528]]
 Tweet 3 : @MSOpinion glad you liked :) @sureshbabu_ --> [[0.98859708]]
 Tweet 4 : @EthanGamerTV @DiamondMinecart Yep it is :) --> [[0.98736718]]
 Tweet 5 : @envydanneh need to put me in that rotation lol :) --> [[0.98762845]]

Negative Tweets : 
 Tweet 1 : @Charliescoco @reeceftcharliie @SimonCowell too late :( --> [[0.01034391]]
 Tweet 2 : @idgitadhg :-( i'm sorry --> [[0.3255905]]
 Tweet 3 : AP won't be the same anymore :-( --> [[0.37852449]]
 Tweet 4 : sooooo tired but I can't sleep :((( --> [[0.00843077]]
 Tweet 5 : I feel so sick :(((((( --> [[0.00900718]]


If the value exceeds 0.5, the tweet is considered positive

next we predict the word that didnt exist in train data

In [27]:
# PREDICT ANOTHER WORD

word_1 = 'I’m so tired of all the negativity online. Can we please have some peace and kindness?'
word_2 = 'The new movie was incredible! I loved every minute of it. Highly recommend it! 🎬😄'
word_3 = 'This taste looks Trash! never come back again :('
word_4 = 'Today was full of joy and laughter. So thankful for the wonderful people in my life. #grateful'

print(f'Word 1 : {word_1} --> {predict_tweet(word_1, theta , frequency_token)}')
print(f'Word 2 : {word_2} --> {predict_tweet(word_2, theta , frequency_token)}')
print(f'Word 2 : {word_3} --> {predict_tweet(word_3, theta , frequency_token)}')
print(f'Word 2 : {word_4} --> {predict_tweet(word_4, theta , frequency_token)}')

Word 1 : I’m so tired of all the negativity online. Can we please have some peace and kindness? --> [[0.44936738]]
Word 2 : The new movie was incredible! I loved every minute of it. Highly recommend it! 🎬😄 --> [[0.61946661]]
Word 2 : This taste looks Trash! never come back again :( --> [[0.01212026]]
Word 2 : Today was full of joy and laughter. So thankful for the wonderful people in my life. #grateful --> [[0.67591974]]


ok next lets evaluate Model Performance

## Evaluate Model

In [28]:
# EVALUATION MODEL LOGISTIC REGRESSION

def evaluation_model(data, label , theta, frequency_token):

    y_hat = []
    
    for tweet in data:

        y_pred = predict_tweet(tweet, theta, frequency_token)
        
        # CHECK WHETHER TWEET IN THE POSITIVE OR NEGATIVE
        if y_pred > 0.5:

            y_hat.append(1.0)
        else:

            y_hat.append(0.0)

    accuracy = np.mean(np.array(y_hat) == np.squeeze(label))
    
    return accuracy

In [29]:
accuracy = evaluation_model(test_data, test_label, theta, frequency_token)

print(f'Accuracy Model : {accuracy}')

Accuracy Model : 0.994


The developed model for sentiment analysis using Logistic Regression showed excellent results with an accuracy of 0.994. This shows that the model successfully classified almost all tweets correctly (positive or negative) in the test dataset.