# What to expect 🤔

In this Notebook, we're going to run inferences on our tweets Dataset with the Sentiment Analysis model trained [here](https://www.kaggle.com/code/ibrahimserouis99/twitter-sentiment-analysis).

# Libraries

In [1]:
import numpy as np 
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
from IPython.display import clear_output

2022-04-17 12:59:22.774612: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


# Exploring the datasets

## Messi

In [2]:
dataset_messi = pd.read_csv("../input/twitter-sentiment-analysis-and-word-embeddings/Cleaned_messi_tweets.csv", encoding="latin")
dataset_messi.head(5)

Unnamed: 0,tweet_id,author_id,content,lang,date,source,geo,retweet_count,like_count,quote_count
0,1515608437270454276,1464517603637014532,messi missed a pen against real madrid in the ...,en,2022-04-17T08:29:44.000Z,Twitter Web App,-1,0,0,0
1,1515608409730699264,1307634596985659395,many strikers does that also not to mention th...,en,2022-04-17T08:29:38.000Z,Twitter Web App,-1,0,0,0
2,1515608399295270912,1475133084596981762,lionel messi has goals in finals check anywhere,en,2022-04-17T08:29:35.000Z,Twitter for Android,-1,0,0,0
3,1515608375782002691,1390728932228485123,spot on however even fergie scholes beckham al...,en,2022-04-17T08:29:30.000Z,Twitter for iPhone,-1,0,0,0
4,1515608312787750913,1342456471167119360,messi is not a striker he s a playmaker giving...,en,2022-04-17T08:29:15.000Z,Twitter for Android,-1,0,1,0


### Check for duplicate tweets

In [3]:
assert len(np.unique(dataset_messi["tweet_id"])) == len(dataset_messi.index), "Duplicate IDs !"

### Drop N/A values

In [4]:
print(f"Number of N/A: \n{dataset_messi.isna().sum()}")
print(f"\nDropping N/A values...")
# Inplace = replace the original dataframe
dataset_messi.dropna(inplace=True)
print(f"\n\nNumber N/A after dropping: \n{dataset_messi.isna().sum()}")

Number of N/A: 
tweet_id         0
author_id        0
content          0
lang             0
date             0
source           0
geo              0
retweet_count    0
like_count       0
quote_count      0
dtype: int64

Dropping N/A values...


Number N/A after dropping: 
tweet_id         0
author_id        0
content          0
lang             0
date             0
source           0
geo              0
retweet_count    0
like_count       0
quote_count      0
dtype: int64


## Ronaldo

In [5]:
dataset_ronaldo = pd.read_csv("../input/twitter-sentiment-analysis-and-word-embeddings/Cleaned_ronaldo_tweets.csv", encoding="utf-8")
dataset_ronaldo.head(5)

Unnamed: 0,tweet_id,author_id,content,lang,date,source,geo,retweet_count,like_count,quote_count
0,1515589799943548929,1152201242766131202,there s no reason to believe cristiano ronaldo...,en,2022-04-17T07:15:41.000Z,Twitter for Android,-1,0.0,0.0,0.0
1,1515589796130865155,1202283615427727360,vintage ronaldo tottenham and norwich tap in h...,en,2022-04-17T07:15:40.000Z,Twitter Web App,-1,0.0,0.0,0.0
2,1515589765172768769,3368387687,werra mister champions league ronaldo why did ...,en,2022-04-17T07:15:32.000Z,Twitter for iPhone,-1,0.0,0.0,0.0
3,1515589753470861318,2615336101,yeah but facts don t lie messi is missing awar...,en,2022-04-17T07:15:30.000Z,Twitter for Android,-1,0.0,0.0,0.0
4,1515589684013027335,1093900817046810625,ronaldo was greatly rewarded after hat trick a...,en,2022-04-17T07:15:13.000Z,IFTTT,-1,0.0,0.0,0.0


### Check for duplicate tweets

In [6]:
assert len(np.unique(dataset_ronaldo["tweet_id"])) == len(dataset_ronaldo.index), "Duplicate IDs!"

### Check data

In [7]:
print(f"Number of N/A: \n{dataset_ronaldo.isna().sum()}")
print(f"\nDropping N/A values...")
# Inplace = replace the original dataframe
dataset_ronaldo.dropna(inplace=True)
print(f"\n\nNumber N/A after dropping: \n{dataset_ronaldo.isna().sum()}")

Number of N/A: 
tweet_id         0
author_id        0
content          9
lang             0
date             0
source           0
geo              0
retweet_count    0
like_count       0
quote_count      0
dtype: int64

Dropping N/A values...


Number N/A after dropping: 
tweet_id         0
author_id        0
content          0
lang             0
date             0
source           0
geo              0
retweet_count    0
like_count       0
quote_count      0
dtype: int64


# Load the model

In [8]:
model = load_model("../input/twitter-sentiment-analysis-and-word-embeddings/TSA_model_v4")
model.summary()

2022-04-17 12:59:29.942592: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-04-17 12:59:29.945796: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-04-17 12:59:29.998070: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-17 12:59:29.998797: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0
coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB deviceMemoryBandwidth: 681.88GiB/s
2022-04-17 12:59:29.998856: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2022-04-17 12:59:30.041395: I tensorflow/stream_executor/platform/def

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_vectorization (TextVect (None, 53)                0         
_________________________________________________________________
sequential (Sequential)      (None, 1)                 27859389  
_________________________________________________________________
activation (Activation)      (None, 1)                 0         
Total params: 27,859,389
Trainable params: 27,859,389
Non-trainable params: 0
_________________________________________________________________


# Set the thresholds : refer to [this link](https://github.com/Justsecret123/Twitter-sentiment-analysis/blob/main/Notebook/twitter-sentiment-analysis.ipynb)

In [9]:
threshold = 0.625

# Run inferences

## Messi dataset

In [10]:
X_messi = dataset_messi["content"]
X_messi.head(10)

0    messi missed a pen against real madrid in the ...
1    many strikers does that also not to mention th...
2      lionel messi has goals in finals check anywhere
3    spot on however even fergie scholes beckham al...
4    messi is not a striker he s a playmaker giving...
5    i watch all messi and ronaldo games ronaldo al...
6    messi is the system he is currently teaching p...
7    three goals in a row and they are acting like ...
8    you messi fans should calm down na he s a unit...
9    prolly messi completed football before and you...
Name: content, dtype: object

### Perform inferences

In [11]:
predictions_messi = model.predict(X_messi)

2022-04-17 12:59:56.808688: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2022-04-17 12:59:57.500991: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11
2022-04-17 12:59:57.555610: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8


### Display some results

In [12]:
for i in range(10): 
    print(f"Tweet: {X_messi[i]} ||| Score : {predictions_messi[i]}")

Tweet: messi missed a pen against real madrid in the quarter final of ucl ||| Score : [0.6731385]
Tweet: many strikers does that also not to mention that he played more games than messi ||| Score : [0.65665877]
Tweet: lionel messi has goals in finals check anywhere ||| Score : [0.7110322]
Tweet: spot on however even fergie scholes beckham also saying messi goat cr isn t top all time ||| Score : [0.6612542]
Tweet: messi is not a striker he s a playmaker giving assists and sometimes scoring goals ||| Score : [0.59083074]
Tweet: i watch all messi and ronaldo games ronaldo always end up with ratings without goals while messi will be rating ||| Score : [0.6554347]
Tweet: messi is the system he is currently teaching psg his plays one season with mbappe mbappe no wan go madrid again ||| Score : [0.6884214]
Tweet: three goals in a row and they are acting like he s messi in his prime ||| Score : [0.7053263]
Tweet: you messi fans should calm down na he s a united boi so ronaldo is the goat and l

## Ronaldo dataset

In [13]:
X_ronaldo = dataset_ronaldo["content"]
X_ronaldo.head(5)

0    there s no reason to believe cristiano ronaldo...
1    vintage ronaldo tottenham and norwich tap in h...
2    werra mister champions league ronaldo why did ...
3    yeah but facts don t lie messi is missing awar...
4    ronaldo was greatly rewarded after hat trick a...
Name: content, dtype: object

In [14]:
predictions_ronaldo = model.predict(X_ronaldo)

### Display some results

In [15]:
for i in range(10): 
    print(f"Tweet: {X_ronaldo[i]} ||| Score : {predictions_ronaldo[i]}")

Tweet: there s no reason to believe cristiano ronaldo won t still be a top player when he s football ||| Score : [0.7210271]
Tweet: vintage ronaldo tottenham and norwich tap in header long range ||| Score : [0.7255068]
Tweet: werra mister champions league ronaldo why did man u buy ronaldo he can t deliver fa cup not to talk of the league nor champions league man his about to deliver conference league with his goals a season ||| Score : [0.5898251]
Tweet: yeah but facts don t lie messi is missing award to match ronaldo ||| Score : [0.69758606]
Tweet: ronaldo was greatly rewarded after hat trick against norwich ||| Score : [0.70167935]
Tweet: connect with turn on notification for daily gain followers ||| Score : [0.71780527]
Tweet: ronaldo earned a bonus of as a result of his hat trick against norwich ||| Score : [0.7271059]
Tweet: manchester united star cristiano earned a bumper bonus after hitting hat trick in victory against norwich ||| Score : [0.7276084]
Tweet: epl rangnick reveals 

# Assign the predictions to the dataset

## Messi

In [16]:
dataset_messi["prediction"] = predictions_messi
dataset_messi.head(5)

Unnamed: 0,tweet_id,author_id,content,lang,date,source,geo,retweet_count,like_count,quote_count,prediction
0,1515608437270454276,1464517603637014532,messi missed a pen against real madrid in the ...,en,2022-04-17T08:29:44.000Z,Twitter Web App,-1,0,0,0,0.673138
1,1515608409730699264,1307634596985659395,many strikers does that also not to mention th...,en,2022-04-17T08:29:38.000Z,Twitter Web App,-1,0,0,0,0.656659
2,1515608399295270912,1475133084596981762,lionel messi has goals in finals check anywhere,en,2022-04-17T08:29:35.000Z,Twitter for Android,-1,0,0,0,0.711032
3,1515608375782002691,1390728932228485123,spot on however even fergie scholes beckham al...,en,2022-04-17T08:29:30.000Z,Twitter for iPhone,-1,0,0,0,0.661254
4,1515608312787750913,1342456471167119360,messi is not a striker he s a playmaker giving...,en,2022-04-17T08:29:15.000Z,Twitter for Android,-1,0,1,0,0.590831


## Ronaldo

In [17]:
dataset_ronaldo["prediction"] = predictions_ronaldo
dataset_ronaldo.head(5)

Unnamed: 0,tweet_id,author_id,content,lang,date,source,geo,retweet_count,like_count,quote_count,prediction
0,1515589799943548929,1152201242766131202,there s no reason to believe cristiano ronaldo...,en,2022-04-17T07:15:41.000Z,Twitter for Android,-1,0.0,0.0,0.0,0.721027
1,1515589796130865155,1202283615427727360,vintage ronaldo tottenham and norwich tap in h...,en,2022-04-17T07:15:40.000Z,Twitter Web App,-1,0.0,0.0,0.0,0.725507
2,1515589765172768769,3368387687,werra mister champions league ronaldo why did ...,en,2022-04-17T07:15:32.000Z,Twitter for iPhone,-1,0.0,0.0,0.0,0.589825
3,1515589753470861318,2615336101,yeah but facts don t lie messi is missing awar...,en,2022-04-17T07:15:30.000Z,Twitter for Android,-1,0.0,0.0,0.0,0.697586
4,1515589684013027335,1093900817046810625,ronaldo was greatly rewarded after hat trick a...,en,2022-04-17T07:15:13.000Z,IFTTT,-1,0.0,0.0,0.0,0.701679


# Convert predictions (scores) to labels (positive or negative)

## Define the processing function

In [18]:
def assign_label(x):
    """
    Parameters
    ----------
    x : SCORE
        The prediction score

    Returns
    -------
    label : STRING
        The sentiment of the tweet.
    """
    rounded_x = round(x,2)
    label = ""
    if rounded_x >= threshold: 
        label = "Positive"
    else:
        label = "Negative"
    return label

## Application : Messi Dataset

In [19]:
 # Create a column named "label"
dataset_messi["label"] = "0"
dataset_messi["label"] = dataset_messi.prediction.apply(lambda x: assign_label(x))
dataset_messi.head(5)

Unnamed: 0,tweet_id,author_id,content,lang,date,source,geo,retweet_count,like_count,quote_count,prediction,label
0,1515608437270454276,1464517603637014532,messi missed a pen against real madrid in the ...,en,2022-04-17T08:29:44.000Z,Twitter Web App,-1,0,0,0,0.673138,Positive
1,1515608409730699264,1307634596985659395,many strikers does that also not to mention th...,en,2022-04-17T08:29:38.000Z,Twitter Web App,-1,0,0,0,0.656659,Positive
2,1515608399295270912,1475133084596981762,lionel messi has goals in finals check anywhere,en,2022-04-17T08:29:35.000Z,Twitter for Android,-1,0,0,0,0.711032,Positive
3,1515608375782002691,1390728932228485123,spot on however even fergie scholes beckham al...,en,2022-04-17T08:29:30.000Z,Twitter for iPhone,-1,0,0,0,0.661254,Positive
4,1515608312787750913,1342456471167119360,messi is not a striker he s a playmaker giving...,en,2022-04-17T08:29:15.000Z,Twitter for Android,-1,0,1,0,0.590831,Negative


## Application : Ronaldo Dataset

In [20]:
# Create a column named "label"
dataset_ronaldo["label"] = "0" 
dataset_ronaldo["label"] = dataset_ronaldo.prediction.apply(lambda x: assign_label(x))
dataset_ronaldo.head(5)

Unnamed: 0,tweet_id,author_id,content,lang,date,source,geo,retweet_count,like_count,quote_count,prediction,label
0,1515589799943548929,1152201242766131202,there s no reason to believe cristiano ronaldo...,en,2022-04-17T07:15:41.000Z,Twitter for Android,-1,0.0,0.0,0.0,0.721027,Positive
1,1515589796130865155,1202283615427727360,vintage ronaldo tottenham and norwich tap in h...,en,2022-04-17T07:15:40.000Z,Twitter Web App,-1,0.0,0.0,0.0,0.725507,Positive
2,1515589765172768769,3368387687,werra mister champions league ronaldo why did ...,en,2022-04-17T07:15:32.000Z,Twitter for iPhone,-1,0.0,0.0,0.0,0.589825,Negative
3,1515589753470861318,2615336101,yeah but facts don t lie messi is missing awar...,en,2022-04-17T07:15:30.000Z,Twitter for Android,-1,0.0,0.0,0.0,0.697586,Positive
4,1515589684013027335,1093900817046810625,ronaldo was greatly rewarded after hat trick a...,en,2022-04-17T07:15:13.000Z,IFTTT,-1,0.0,0.0,0.0,0.701679,Positive


# Save the results
> Note : if you plan to use the dataset, feel free to manually check and tweak the results, in order to come closer to a human-precision level

## Messi 

In [21]:
dataset_messi.to_csv("Predictions_messi.csv", index=False)

## Ronaldo

In [22]:
dataset_ronaldo.to_csv("Predictions_ronaldo.csv", index=False)