In [3]:
import torch
import torch.nn as nn
import torch.functional as F
import pandas as pd
import torch.optim as opt
import numpy
from torchtext.vocab import GloVe
import random

torch.manual_seed(1)
random.seed(1)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
class onePercent():
    def within_one_percent(self, num1, num2):
        one_percent = abs(num1) * 0.01  # one percent of the first number
        return abs(num1 - num2) <= one_percent

In [5]:
class toGlove():
    def __init__(self):
        pass

    def toGloVe(self, batch_sentence):
        """
            Convert the tweets into GloVe embedding
            pre - batch of sentences
            post - a tensor representing the sentence, can directly input to 
                  rnn, with batch_first=False
        """

        batch_embedding=[]

        for sentence in batch_sentence:
            old_sentence=sentence
            sentence=sentence.replace(",", "")
            sentence=sentence.replace("-", " ")
            sentence=sentence.replace("--", " ")
            sentence=sentence.replace(":", "")
            sentence=sentence.replace(";", "")
            sentence=sentence.replace("!", ".")
            sentence=sentence.replace("?", ".")
            sentence=sentence.replace("\n", " ")
            sentence=sentence.replace("(", " ")
            sentence=sentence.replace(")", " ")
            sentence=sentence.replace("  ", " ")
            sentence=sentence.lower()

            list_of_embedding=[]
            sentence=sentence.split()

            if(len(sentence)>10):
                for i in range(10):
                    list_of_embedding.append(glove[sentence[i]])
            else:
                for word in sentence:
                    list_of_embedding.append(glove[word])
            
            list_of_embedding=torch.stack(list_of_embedding)

            batch_embedding.append(list_of_embedding)

        batch_embedding = nn.utils.rnn.pad_sequence(batch_embedding)

        return batch_embedding

In [32]:
class RNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature_extractor=nn.GRU(50, 10, 1)
        self.classifier1=nn.Linear(10, 2)
        self.classifier2=nn.Linear(2, 1)
        self.sigmoid=nn.Sigmoid()
    
    def forward(self, batch_embedding):
        #RNN
        output, _=self.feature_extractor(batch_embedding)
        #Get the output embedding
        output=output[-1,:]

        #Input into the classifier
        output=self.classifier1(output)
        output=self.sigmoid(output)
        output=self.classifier2(output)
        output=self.sigmoid(output)
        return output

In [43]:
my_rnn=RNN()
my_rnn= torch.load("Trump_Tweet.pth", map_location=torch.device(device))
my_rnn=my_rnn.to(device)

In [44]:
#Final testing stage
glove = GloVe(name='6B', dim=50)
to_glove=toGlove()
my_tweet=["I am going to go fishing today"]
my_tweet=to_glove.toGloVe(my_tweet)
my_tweet=my_tweet.to(device)
trump_tweet=["I’ve done more in less than 4 years than Biden’s done in more than 40 years, including for Black America. Biden has been a part of every failed decision for decades. Bad Trade Deals, Endless Wars, you name it, he has shown a complete lack of leadership. He’s weak & shot!!!"]
trump_tweet=to_glove.toGloVe(trump_tweet)
trump_tweet=trump_tweet.to(device)

print("This is my tweet, the chance of this being a real Trump tweet is: ", my_rnn(my_tweet).item()*100, "%")
print("This is Trump tweet, the chance of this being a real Trump tweet is: ", my_rnn(trump_tweet).item()*100, "%")

This is my tweet, the chance of this being a real Trump tweet is:  8.218467980623245 %
This is Trump tweet, the chance of this being a real Trump tweet is:  91.62430167198181 %


In [40]:
#This is training the model
df=pd.read_csv("realdonaldtrump.csv")

my_rnn=RNN()
one_percent=onePercent()
my_rnn=my_rnn.to(device)

big_list=[]

criterion=nn.BCELoss()
optimizer=opt.Adam(my_rnn.parameters(), lr=0.001, weight_decay=0.00001)

read=pd.read_csv("personality.csv")

my_batch=[0, 0, 0, 0, 0, 0, 0, 0]

trump_loss=0
generated_loss=0

#Training the network
size=len(df["content"])
random_size=len(read["Persona"])-1
epoch=1

counter=0

for j in range(epoch):
    for i in range(size):
        if(i%8==0 and i!=0):
            #Switch 8 sentences of Trump tweet into embedding
            my_batch_glove=to_glove.toGloVe(my_batch)
            my_batch_glove=my_batch_glove.to(device)

            #Get the output of the tweet
            output=my_rnn(my_batch_glove)

            #Get the correct answer tensor
            correct_answer=torch.ones(8, 1)
            correct_answer=correct_answer.to(device)

            #Gradient descent
            loss=criterion(output, correct_answer)
            loss.backward()
            trump_loss=loss.item()

            if(i%10240==0):
                print("Real Trump Loss", loss.item())

            torch.nn.utils.clip_grad_norm_(my_rnn.parameters(), max_norm=1)

            optimizer.step()
            optimizer.zero_grad()

            #Create a list with 8 elements, each of which is a sentence
            list_of_random_sentences=[]
            k=0
            while(k<8):
                #Get a random index of our sentence
                random_index=random.randint(0, random_size)

                sentence=read["Persona"][random_index]
                list_of_random_sentences.append(sentence)
                k+=1

            #Change the list of sentences into tensor embedding
            list_of_random_sentences=to_glove.toGloVe(list_of_random_sentences)
            list_of_random_sentences=list_of_random_sentences.to(device)

            #Get the model predicted output
            output=my_rnn(list_of_random_sentences)

            #This sentence is a fake Trump tweet
            correct_answer=torch.zeros(8, 1)
            correct_answer=correct_answer.to(device)

            #Gradient descent
            loss=criterion(output, correct_answer)
            if(i%10240==0):
                print("Generated Loss", loss.item())
            loss.backward()

            generated_loss=loss.item()
            torch.nn.utils.clip_grad_norm_(my_rnn.parameters(), max_norm=1)

            optimizer.step()
            optimizer.zero_grad()

            if(generated_loss<0.1 and trump_loss<0.1):
                counter+=1
                if(counter>6):
                    if(one_percent.within_one_percent(generated_loss, trump_loss)):
                        print(generated_loss, trump_loss)
                        break;
            else:
                counter=0
        else:
            my_batch[i%8]=df["content"][i]

Real Trump Loss 0.23312360048294067
Generated Loss 0.18683865666389465
0.0832766592502594 0.08311685174703598


In [39]:
torch.save(my_rnn, 'Trump_Tweet.pth')