In [1]:
import collections
import pathlib
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import utils
from tensorflow.keras.layers import TextVectorization
import matplotlib.pyplot as plt
import os
import shutil
import string
import re
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split

In [2]:
covid_df = pd.read_csv("Covid-19_Twitter_Dataset (Apr-Jun 2020).csv")

In [3]:
print("Dataset size: ", len(covid_df))

Dataset size:  143903


In [4]:
max_features = 20000
max_sequence_length = 100

In [5]:
covid_df.head()

Unnamed: 0,id,created_at,source,original_text,lang,favorite_count,retweet_count,original_author,hashtags,user_mentions,place,clean_tweet,compound,neg,neu,pos,sentiment
0,1.25e+18,2020-04-19,"<a href=""http://twitter.com/download/android"" ...",RT @GlblCtzn: .@priyankachopra is calling on l...,en,0.0,31.0,RJIshak,,"GlblCtzn, priyankachopra",Jakarta Capital Region,call leader help protect refuge covid19 provid...,0.8176,0.0,0.452,0.548,pos
1,1.25e+18,2020-04-19,"<a href=""http://twitter.com/download/android"" ...",RT @OGSG_Official: OGUN STATE SUPPORT FOR CBN-...,en,0.0,61.0,makinwaoluwole,,OGSG_Official,Nigeria,ogun state support cbn nirsal covid19 target c...,0.6486,0.0,0.602,0.398,pos
2,1.25e+18,2020-04-19,"<a href=""http://twitter.com/download/iphone"" r...",RT @AdvoBarryRoux: These 5 police officials ba...,en,0.0,1.0,TembeAmu,,AdvoBarryRoux,,polic offici base namahadi polic station busi ...,0.2732,0.0,0.851,0.149,pos
3,1.25e+18,2020-04-19,"<a href=""http://twitter.com/download/iphone"" r...",RT @MobilePunch: COVID-19: Oyo discharges two ...,en,0.0,0.0,ilyasrabiu,,MobilePunch,"Lagos, Nigeria",covid19 oyo discharg two patient,0.0,0.0,1.0,0.0,neu
4,1.25e+18,2020-04-19,"<a href=""http://twitter.com/download/android"" ...",My Condolences to the Family of those who did ...,en,0.0,13869.0,bucketeconomist,Covid_19,,,condol famili surviv,0.0,0.0,1.0,0.0,neu


In [6]:
columns_to_drop = ['id', 'created_at', 'source', 'original_text', 'lang', 'favorite_count', 'retweet_count', 'original_author', 'hashtags', 'user_mentions', 'place', 'compound', 'neg', 'neu', 'pos']
covid_df = covid_df.drop(columns=columns_to_drop)

In [7]:
covid_df.head(5)

Unnamed: 0,clean_tweet,sentiment
0,call leader help protect refuge covid19 provid...,pos
1,ogun state support cbn nirsal covid19 target c...,pos
2,polic offici base namahadi polic station busi ...,pos
3,covid19 oyo discharg two patient,neu
4,condol famili surviv,neu


In [8]:
covid_df.tail()

Unnamed: 0,clean_tweet,sentiment
143898,congratul studi one year master degre,neu
143899,apolog fear anxieti caus claim surviv rate acq...,neg
143900,anoth meat process outbreak time,neu
143901,knew covid19 would spread trump prais els rall...,pos
143902,friend stori miss main point move forward open...,pos


In [11]:
covid_df['clean_tweet'] = covid_df['clean_tweet'].fillna('')
covid_df['clean_tweet'] = covid_df['clean_tweet'].apply(lambda x: x.lower())
covid_df['clean_tweet'] = covid_df['clean_tweet'].apply((lambda x: re.sub('[^a-zA-z0-9\s]','',x)))

print(covid_df[ covid_df['sentiment'] == 'pos'].size)
print(covid_df[ covid_df['sentiment'] == 'neg'].size)
print(covid_df[ covid_df['sentiment'] == 'neu'].size)

covid_df['clean_tweet'] = covid_df['clean_tweet'].str.replace('rt', ' ')

max_features = 20000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(covid_df['clean_tweet'].values)
X = tokenizer.texts_to_sequences(covid_df['clean_tweet'].values)
X = pad_sequences(X,maxlen=max_sequence_length)

92250
80384
115172


In [13]:
embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_features, embed_dim,input_length = X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(3,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 128)          2560000   
                                                                 
 spatial_dropout1d (Spatial  (None, 100, 128)          0         
 Dropout1D)                                                      
                                                                 
 lstm (LSTM)                 (None, 196)               254800    
                                                                 
 dense (Dense)               (None, 3)                 591       
                                                                 
Total params: 2815391 (10.74 MB)
Trainable params: 2815391 (10.74 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [14]:
Y = pd.get_dummies(covid_df['sentiment']).values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.3, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(100732, 100) (100732, 3)
(43171, 100) (43171, 3)


In [15]:
batch_size = 32
model.fit(X_train, Y_train, epochs = 2, batch_size=batch_size, verbose = 2)

Epoch 1/2
3148/3148 - 797s - loss: 0.2024 - accuracy: 0.9301 - 797s/epoch - 253ms/step
Epoch 2/2
3148/3148 - 796s - loss: 0.0698 - accuracy: 0.9808 - 796s/epoch - 253ms/step


<keras.src.callbacks.History at 0x1a2453d7350>

In [16]:
validation_size = 15000

X_validate = X_test[-validation_size:]
Y_validate = Y_test[-validation_size:]
X_test = X_test[:-validation_size]
Y_test = Y_test[:-validation_size]
score,acc = model.evaluate(X_test, Y_test, verbose = 2, batch_size = batch_size)
print("score: %.2f" % (score))
print("acc: %.2f" % (acc))

881/881 - 30s - loss: 0.0674 - accuracy: 0.9814 - 30s/epoch - 34ms/step
score: 0.07
acc: 0.98


In [17]:
model.save('sentiment_analysis_model.keras')

In [18]:
from tensorflow.keras.models import load_model

# Load the saved model from the HDF5 file
loaded_model = load_model('sentiment_analysis_model.keras')

In [19]:

new_text = ["The covid19 is in my country."]  # Replace with your new data
new_data = tokenizer.texts_to_sequences(new_text)  # Tokenize and preprocess the new text
new_data = pad_sequences(new_data, maxlen=max_sequence_length)  # Pad to match the model's input shape

# Make predictions
predictions = loaded_model.predict(new_data)

# Interpret predictions (e.g., convert class probabilities to labels)
sentiment_labels = ['negative', 'neutral', 'positive']  # Replace with your label names
predicted_label = sentiment_labels[predictions.argmax()]

print("Predicted sentiment:", predicted_label)

Predicted sentiment: neutral


In [20]:
from flask import Flask, request, jsonify
from keras.models import load_model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [21]:
app = Flask(__name__)

In [22]:
model = load_model('sentiment_analysis_model.keras')

In [23]:
tokenizer = Tokenizer(num_words=20000)
tokenizer.fit_on_texts([]) 

In [24]:
@app.route('/custom_endpoint', methods=['POST'])
def predict_sentiment():
    try:
        # Get the text data from the request
        data = request.get_json()
        text = data['text']

        # Preprocess the text
        sequence = tokenizer.texts_to_sequences([text])
        padded_sequence = pad_sequences(sequence, maxlen=100)  # Adjust maxlen as needed

        # Make predictions
        prediction = model.predict(padded_sequence)
        sentiment_labels = ['negative', 'neutral', 'positive']  # Define your labels
        predicted_label = sentiment_labels[prediction.argmax()]

        # Return the result as JSON
        return jsonify({'result': 'Prediction result'})
    except Exception as e:
        return jsonify({'error': str(e)})

if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
