In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
#Loadimg the trained model
model = load_model('/content/drive/MyDrive/model.h5')



In [4]:
# Load the preprocessed tweets
encoding = 'ISO-8859-1'
tweets_df = pd.read_csv('/content/drive/MyDrive/clean_tweets.csv',encoding=encoding)

In [5]:
# Tokenizing same as the training dataset
max_features = 20000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(tweets_df['clean_text'].values)
tweet_tokens = tokenizer.texts_to_sequences(tweets_df['clean_text'].values)

# Pad the sequences to a fixed length consistent with the input shape of the model
tweet_tokens = pad_sequences(tweet_tokens, maxlen=50)

In [6]:
# Make predictions using the trained model
predictions = model.predict(tweet_tokens)



In [10]:
# Converting the categorical labels to polarity value

categorical_labels = [0, 1]
polarity = []
for i in predictions:
    if np.argmax(i) == 0:
        polarity.append(0)
    elif np.argmax(i) == 1:
        polarity.append(1)
    else:
        polarity.append(-1)

In [11]:
len(tweets_df)

9655

In [12]:
# Add the predicted sentiments to the DataFrame
tweets_df['polarity'] = polarity
tweets_df['polarity'].unique()

array([1, 0])

In [13]:
tweets_df['polarity'].value_counts()


1    5126
0    4529
Name: polarity, dtype: int64

In [15]:
tweets_df.tail(100)

Unnamed: 0,clean_text,polarity
9555,kindly check dm and assist tia,1
9556,uda is selling govt shares in all companies th...,0
9557,hi bikodo you mean if you can use a sim on y...,1
9558,check your dm,1
9559,my question was will the data bundles that w...,0
...,...,...
9650,hello kindly activate my mpesa,1
9651,after the split if mpesa and safaricom get lis...,1
9652,state of in conversation with,1
9653,hey musa please ignore if you have no issue ...,1


In [16]:
# Save the updated DataFrame to a new CSV file
tweets_df.to_csv('tweets_with_sentiments.csv', index=False)