# Models on Combined Features
## Data Preparation

In [1]:
from Classifier import get_KNN_Model, get_accuracy_matric, get_lin_SVM_Model, get_NaiveBayes_Model
from Hawkes_Process import get_topic_vector
from Topic_Modelling import LDA_main_driver
from filters import train_test_splitter
import numpy as np
import warnings

warnings.simplefilter(action='ignore')

In [2]:
df, dict_genuine, dict_fake, lda_genuine, lda_fake = LDA_main_driver()
num_topics = 10

-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* LDA Training Started -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*


-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* LDA Training Ended -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*




In [3]:
user_topic_vectors, labels = get_topic_vector(df, dict_genuine, dict_fake, lda_genuine, lda_fake, 0)

-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* Hawkes Process Started -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*


-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* Hawkes Process Ended -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*



In [4]:
total_len = len(user_topic_vectors)
X = np.array(user_topic_vectors)
Y = np.array(labels)

In [5]:
x_train, x_test, y_train, y_test = train_test_splitter(X, Y)

In [6]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((1221, 22), (1221,), (306, 22), (306,))

In [7]:
def confusionMatrixPrint(P,Y,dataType):
    TF = 0
    TT = 0
    FF = 0
    FT = 0
    for p,y in zip(P,Y):
        if (p,y) == (0,0):
            FF += 1
        elif (p,y) == (1,1):
            TT += 1
        elif (p,y) == (0,1):
            TF += 1
        else:
            FT += 1

    print('------------------------------------------------------------------------------------\n')
    if dataType == 0:
        print("  Confusion Matrix for Train Data : ")
    else:
        print("  Confusion Matrix for Validation Data \n")
    print("         True Positive = ",TT,"           True Negetive = ",TF)
    print("        False Positive = ",FT,"          False Negetive = ",FF)
    print('\n------------------------------------------------------------------------------------\n')
    total = TT+FF+TF+FT
    print(f"  Total Cases : {total}\n")
    print("Accuracy  : ",(TT+FF)/total)
    try:
        prec = (TT)/(TT+FT)
        recall = (TT)/(TT+TF)
        f = (2*recall*prec)/(recall+prec)
        print("Precision : ",prec)
        print("Recall    : ",recall)
        print("F1 Score  : ",f)
    except:
        pass
    
    
    print('\n------------------------------------------------------------------------------------')
    
    
    

##  1. KNN Classifier on Combined Topic Vectors Generated by LDA and Hawkes process


In [8]:
knn = get_KNN_Model(x_train, y_train)

In [9]:
pred = knn.predict(x_train)
confusionMatrixPrint(pred,y_train,0)

------------------------------------------------------------------------------------

  Confusion Matrix for Train Data : 
         True Positive =  148            True Negetive =  173
        False Positive =  46           False Negetive =  854

------------------------------------------------------------------------------------

  Total Cases : 1221

Accuracy  :  0.8206388206388207
Precision :  0.7628865979381443
Recall    :  0.46105919003115264
F1 Score  :  0.574757281553398

------------------------------------------------------------------------------------


In [10]:
pred = knn.predict(x_test)
confusionMatrixPrint(pred,y_test,1)

------------------------------------------------------------------------------------

  Confusion Matrix for Validation Data 

         True Positive =  20            True Negetive =  45
        False Positive =  27           False Negetive =  214

------------------------------------------------------------------------------------

  Total Cases : 306

Accuracy  :  0.7647058823529411
Precision :  0.425531914893617
Recall    :  0.3076923076923077
F1 Score  :  0.3571428571428572

------------------------------------------------------------------------------------


## 2. SVM Classifier on Combined Topic Vectors Generated by LDA and Hawkes process

In [11]:
svm = get_lin_SVM_Model(x_train, y_train)

In [12]:
pred = svm.predict(x_train)
confusionMatrixPrint(pred,y_train,0)

------------------------------------------------------------------------------------

  Confusion Matrix for Train Data : 
         True Positive =  51            True Negetive =  270
        False Positive =  27           False Negetive =  873

------------------------------------------------------------------------------------

  Total Cases : 1221

Accuracy  :  0.7567567567567568
Precision :  0.6538461538461539
Recall    :  0.1588785046728972
F1 Score  :  0.2556390977443609

------------------------------------------------------------------------------------


In [13]:
pred = svm.predict(x_test)
confusionMatrixPrint(pred,y_test,1)

------------------------------------------------------------------------------------

  Confusion Matrix for Validation Data 

         True Positive =  9            True Negetive =  56
        False Positive =  7           False Negetive =  234

------------------------------------------------------------------------------------

  Total Cases : 306

Accuracy  :  0.7941176470588235
Precision :  0.5625
Recall    :  0.13846153846153847
F1 Score  :  0.22222222222222224

------------------------------------------------------------------------------------


## 3. Naive Bayes Classifier on Combined Topic Vectors Generated by LDA and Hawkes process

In [14]:
nb = get_NaiveBayes_Model(x_train, y_train)

In [15]:
pred = nb.predict(x_train)
confusionMatrixPrint(pred,y_train,0)

------------------------------------------------------------------------------------

  Confusion Matrix for Train Data : 
         True Positive =  137            True Negetive =  184
        False Positive =  171           False Negetive =  729

------------------------------------------------------------------------------------

  Total Cases : 1221

Accuracy  :  0.7092547092547092
Precision :  0.4448051948051948
Recall    :  0.42679127725856697
F1 Score  :  0.4356120826709062

------------------------------------------------------------------------------------


In [16]:
pred = nb.predict(x_test)
confusionMatrixPrint(pred,y_test,1)

------------------------------------------------------------------------------------

  Confusion Matrix for Validation Data 

         True Positive =  27            True Negetive =  38
        False Positive =  55           False Negetive =  186

------------------------------------------------------------------------------------

  Total Cases : 306

Accuracy  :  0.696078431372549
Precision :  0.32926829268292684
Recall    :  0.4153846153846154
F1 Score  :  0.3673469387755102

------------------------------------------------------------------------------------


## 4. Nueral Network Classifier on Combined Topic Vectors Generated by LDA and Hawkes process
### Fully Connected Network of :
     - Input Layer : 22, 64
     - Hidden Layer 1 : 64 , 128
     - Hidden Layer 2 : 128 , 256
     - Hidden Layer 3 : 256 , 512
     - Hidden Layer 4 : 512 , 256
     - Hidden Layer 5 : 256 , 128
     - Hidden Layer 6 : 128 , 64
     - Hidden Layer 7 : 64 , 32
     - Hidden Layer 8 : 32 , 16
     - Output Layer : 16 , 2

In [17]:
import torch.nn as nn
import warnings
import torch

In [18]:
def get_Device():
    return torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [19]:
device = get_Device()
print(device)

cuda


In [20]:
x_train = torch.Tensor(x_train).to(device)
y_train = torch.Tensor(y_train).to(device)
x_test = torch.Tensor(x_test).to(device)
y_test = torch.Tensor(y_test).to(device)
y_train = y_train.to(torch.long)

In [21]:
y_train = y_train.to(torch.long)

In [22]:
class CombinedClassifierModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(CombinedClassifierModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 64)
        self.layer2 = nn.Linear(64, 128)
        self.layer3 = nn.Linear(128, 256)
        self.layer4 = nn.Linear(256, 512)
        self.layer5 = nn.Linear(512, 256)
        
        self.layer6 = nn.Linear(256, 128)
        self.layer7 = nn.Linear(128, 64)
        self.layer8 = nn.Linear(64, 32)
        self.layer9 = nn.Linear(32, 16)
        self.layer10 = nn.Linear(16, output_size)
        self.relu = nn.ReLU()
        

    def forward(self, inputs):
        out = self.layer1(inputs)
        out = self.relu(out)
        
        out = self.layer2(out)
        out = self.relu(out)
        
        out = self.layer3(out)
        out = self.relu(out)
        
        out = self.layer4(out)
        out = self.relu(out)
        
        out = self.layer5(out)
        out = self.relu(out)
        
        out = self.layer6(out)
        out = self.relu(out)
        
        out = self.layer7(out)
        out = self.relu(out)
        
        out = self.layer8(out)
        out = self.relu(out)
        
        out = self.layer9(out)
        out = self.relu(out)
        
        out = self.layer10(out)
        
        return out

In [23]:
input_size = 22
output_size = 2
hidden_size = 80
learning_rate = 0.0001
n_epochs = 500

In [24]:
model = CombinedClassifierModel(input_size=input_size,
                                     output_size=output_size)
model.to(device)

CombinedClassifierModel(
  (layer1): Linear(in_features=22, out_features=64, bias=True)
  (layer2): Linear(in_features=64, out_features=128, bias=True)
  (layer3): Linear(in_features=128, out_features=256, bias=True)
  (layer4): Linear(in_features=256, out_features=512, bias=True)
  (layer5): Linear(in_features=512, out_features=256, bias=True)
  (layer6): Linear(in_features=256, out_features=128, bias=True)
  (layer7): Linear(in_features=128, out_features=64, bias=True)
  (layer8): Linear(in_features=64, out_features=32, bias=True)
  (layer9): Linear(in_features=32, out_features=16, bias=True)
  (layer10): Linear(in_features=16, out_features=2, bias=True)
  (relu): ReLU()
)

In [25]:
lossfn = torch.nn.CrossEntropyLoss()
lossfn.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [26]:
def Validator(x_test, y_test):
    predicted = model(x_test).to(device)
    pred = torch.max(predicted.data,1)[1]
    total_test = len(y_test)
    correct_pred = 0

    for i in range(total_test):
        if y_test[i] == pred[i]:
            correct_pred += 1

    return correct_pred/total_test

In [27]:
val_acc_list = []
training_acc_list = []
model_list = []

for epoch in range(n_epochs):

    predicted = model(x_train).to(device)    

    loss = lossfn(predicted,y_train)
        
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()
    
    val_acc = Validator(x_test, y_test.to(torch.int))
    
    print(f'Epoch [ {epoch+1} / {n_epochs} ] Training-Loss = {loss.item():.4f} Training-Accuracy = {1- loss.item()} Validation-Accuracy = {val_acc}')
    
    training_acc_list.append(1-loss.item())
    val_acc_list.append(val_acc)
    model_list.append(model)

Epoch [ 1 / 500 ] Training-Loss = 0.6485 Training-Accuracy = 0.3515099883079529 Validation-Accuracy = 0.7875816993464052
Epoch [ 2 / 500 ] Training-Loss = 0.6483 Training-Accuracy = 0.3516991138458252 Validation-Accuracy = 0.7875816993464052
Epoch [ 3 / 500 ] Training-Loss = 0.6481 Training-Accuracy = 0.35188794136047363 Validation-Accuracy = 0.7875816993464052
Epoch [ 4 / 500 ] Training-Loss = 0.6479 Training-Accuracy = 0.35207635164260864 Validation-Accuracy = 0.7875816993464052
Epoch [ 5 / 500 ] Training-Loss = 0.6477 Training-Accuracy = 0.3522634506225586 Validation-Accuracy = 0.7875816993464052
Epoch [ 6 / 500 ] Training-Loss = 0.6475 Training-Accuracy = 0.3524501919746399 Validation-Accuracy = 0.7875816993464052
Epoch [ 7 / 500 ] Training-Loss = 0.6474 Training-Accuracy = 0.3526347279548645 Validation-Accuracy = 0.7875816993464052
Epoch [ 8 / 500 ] Training-Loss = 0.6472 Training-Accuracy = 0.3528137803077698 Validation-Accuracy = 0.7875816993464052
Epoch [ 9 / 500 ] Training-Los

Epoch [ 70 / 500 ] Training-Loss = 0.5976 Training-Accuracy = 0.4023905396461487 Validation-Accuracy = 0.7875816993464052
Epoch [ 71 / 500 ] Training-Loss = 0.5942 Training-Accuracy = 0.4057573676109314 Validation-Accuracy = 0.7875816993464052
Epoch [ 72 / 500 ] Training-Loss = 0.5909 Training-Accuracy = 0.4091079831123352 Validation-Accuracy = 0.7875816993464052
Epoch [ 73 / 500 ] Training-Loss = 0.5876 Training-Accuracy = 0.41241705417633057 Validation-Accuracy = 0.7875816993464052
Epoch [ 74 / 500 ] Training-Loss = 0.5844 Training-Accuracy = 0.41563379764556885 Validation-Accuracy = 0.7875816993464052
Epoch [ 75 / 500 ] Training-Loss = 0.5813 Training-Accuracy = 0.418698251247406 Validation-Accuracy = 0.7875816993464052
Epoch [ 76 / 500 ] Training-Loss = 0.5785 Training-Accuracy = 0.42152875661849976 Validation-Accuracy = 0.7875816993464052
Epoch [ 77 / 500 ] Training-Loss = 0.5760 Training-Accuracy = 0.42399871349334717 Validation-Accuracy = 0.7875816993464052
Epoch [ 78 / 500 ] Tr

Epoch [ 137 / 500 ] Training-Loss = 0.5682 Training-Accuracy = 0.43179041147232056 Validation-Accuracy = 0.7875816993464052
Epoch [ 138 / 500 ] Training-Loss = 0.5681 Training-Accuracy = 0.4318660497665405 Validation-Accuracy = 0.7875816993464052
Epoch [ 139 / 500 ] Training-Loss = 0.5681 Training-Accuracy = 0.4319421648979187 Validation-Accuracy = 0.7875816993464052
Epoch [ 140 / 500 ] Training-Loss = 0.5680 Training-Accuracy = 0.4320187568664551 Validation-Accuracy = 0.7875816993464052
Epoch [ 141 / 500 ] Training-Loss = 0.5679 Training-Accuracy = 0.43209749460220337 Validation-Accuracy = 0.7875816993464052
Epoch [ 142 / 500 ] Training-Loss = 0.5678 Training-Accuracy = 0.43217766284942627 Validation-Accuracy = 0.7875816993464052
Epoch [ 143 / 500 ] Training-Loss = 0.5677 Training-Accuracy = 0.4322584867477417 Validation-Accuracy = 0.7875816993464052
Epoch [ 144 / 500 ] Training-Loss = 0.5677 Training-Accuracy = 0.43234026432037354 Validation-Accuracy = 0.7875816993464052
Epoch [ 145 

Epoch [ 206 / 500 ] Training-Loss = 0.5608 Training-Accuracy = 0.4391738176345825 Validation-Accuracy = 0.7875816993464052
Epoch [ 207 / 500 ] Training-Loss = 0.5607 Training-Accuracy = 0.43933969736099243 Validation-Accuracy = 0.7875816993464052
Epoch [ 208 / 500 ] Training-Loss = 0.5605 Training-Accuracy = 0.4395086169242859 Validation-Accuracy = 0.7875816993464052
Epoch [ 209 / 500 ] Training-Loss = 0.5603 Training-Accuracy = 0.4396820664405823 Validation-Accuracy = 0.7875816993464052
Epoch [ 210 / 500 ] Training-Loss = 0.5601 Training-Accuracy = 0.4398593306541443 Validation-Accuracy = 0.7875816993464052
Epoch [ 211 / 500 ] Training-Loss = 0.5600 Training-Accuracy = 0.4400401711463928 Validation-Accuracy = 0.7875816993464052
Epoch [ 212 / 500 ] Training-Loss = 0.5598 Training-Accuracy = 0.4402252435684204 Validation-Accuracy = 0.7875816993464052
Epoch [ 213 / 500 ] Training-Loss = 0.5596 Training-Accuracy = 0.44041359424591064 Validation-Accuracy = 0.7875816993464052
Epoch [ 214 / 

Epoch [ 274 / 500 ] Training-Loss = 0.5309 Training-Accuracy = 0.46912485361099243 Validation-Accuracy = 0.7875816993464052
Epoch [ 275 / 500 ] Training-Loss = 0.5302 Training-Accuracy = 0.46981996297836304 Validation-Accuracy = 0.7875816993464052
Epoch [ 276 / 500 ] Training-Loss = 0.5295 Training-Accuracy = 0.4704861044883728 Validation-Accuracy = 0.7875816993464052
Epoch [ 277 / 500 ] Training-Loss = 0.5289 Training-Accuracy = 0.4711225628852844 Validation-Accuracy = 0.7875816993464052
Epoch [ 278 / 500 ] Training-Loss = 0.5283 Training-Accuracy = 0.47173088788986206 Validation-Accuracy = 0.7875816993464052
Epoch [ 279 / 500 ] Training-Loss = 0.5277 Training-Accuracy = 0.47231942415237427 Validation-Accuracy = 0.7875816993464052
Epoch [ 280 / 500 ] Training-Loss = 0.5271 Training-Accuracy = 0.4728785753250122 Validation-Accuracy = 0.7875816993464052
Epoch [ 281 / 500 ] Training-Loss = 0.5266 Training-Accuracy = 0.47340822219848633 Validation-Accuracy = 0.7875816993464052
Epoch [ 282

Epoch [ 343 / 500 ] Training-Loss = 0.5051 Training-Accuracy = 0.4949473738670349 Validation-Accuracy = 0.7875816993464052
Epoch [ 344 / 500 ] Training-Loss = 0.5046 Training-Accuracy = 0.4953871965408325 Validation-Accuracy = 0.7875816993464052
Epoch [ 345 / 500 ] Training-Loss = 0.5042 Training-Accuracy = 0.49584293365478516 Validation-Accuracy = 0.7875816993464052
Epoch [ 346 / 500 ] Training-Loss = 0.5037 Training-Accuracy = 0.49632728099823 Validation-Accuracy = 0.7875816993464052
Epoch [ 347 / 500 ] Training-Loss = 0.5032 Training-Accuracy = 0.49681270122528076 Validation-Accuracy = 0.7875816993464052
Epoch [ 348 / 500 ] Training-Loss = 0.5027 Training-Accuracy = 0.4973018765449524 Validation-Accuracy = 0.7875816993464052
Epoch [ 349 / 500 ] Training-Loss = 0.5022 Training-Accuracy = 0.49779587984085083 Validation-Accuracy = 0.7875816993464052
Epoch [ 350 / 500 ] Training-Loss = 0.5017 Training-Accuracy = 0.4982806444168091 Validation-Accuracy = 0.7875816993464052
Epoch [ 351 / 5

Epoch [ 411 / 500 ] Training-Loss = 0.4681 Training-Accuracy = 0.531912624835968 Validation-Accuracy = 0.7549019607843137
Epoch [ 412 / 500 ] Training-Loss = 0.4689 Training-Accuracy = 0.531075656414032 Validation-Accuracy = 0.7679738562091504
Epoch [ 413 / 500 ] Training-Loss = 0.4664 Training-Accuracy = 0.5335899889469147 Validation-Accuracy = 0.7745098039215687
Epoch [ 414 / 500 ] Training-Loss = 0.4668 Training-Accuracy = 0.5331855416297913 Validation-Accuracy = 0.738562091503268
Epoch [ 415 / 500 ] Training-Loss = 0.4674 Training-Accuracy = 0.5326302647590637 Validation-Accuracy = 0.7581699346405228
Epoch [ 416 / 500 ] Training-Loss = 0.4647 Training-Accuracy = 0.5352595150470734 Validation-Accuracy = 0.7777777777777778
Epoch [ 417 / 500 ] Training-Loss = 0.4664 Training-Accuracy = 0.5336346328258514 Validation-Accuracy = 0.738562091503268
Epoch [ 418 / 500 ] Training-Loss = 0.4656 Training-Accuracy = 0.534433513879776 Validation-Accuracy = 0.7450980392156863
Epoch [ 419 / 500 ] T

Epoch [ 480 / 500 ] Training-Loss = 0.4316 Training-Accuracy = 0.5684272050857544 Validation-Accuracy = 0.7483660130718954
Epoch [ 481 / 500 ] Training-Loss = 0.4311 Training-Accuracy = 0.5689340233802795 Validation-Accuracy = 0.7450980392156863
Epoch [ 482 / 500 ] Training-Loss = 0.4308 Training-Accuracy = 0.5692094266414642 Validation-Accuracy = 0.7549019607843137
Epoch [ 483 / 500 ] Training-Loss = 0.4305 Training-Accuracy = 0.5695181787014008 Validation-Accuracy = 0.7450980392156863
Epoch [ 484 / 500 ] Training-Loss = 0.4302 Training-Accuracy = 0.5697978138923645 Validation-Accuracy = 0.7549019607843137
Epoch [ 485 / 500 ] Training-Loss = 0.4299 Training-Accuracy = 0.5701189041137695 Validation-Accuracy = 0.7483660130718954
Epoch [ 486 / 500 ] Training-Loss = 0.4295 Training-Accuracy = 0.5705134570598602 Validation-Accuracy = 0.7581699346405228
Epoch [ 487 / 500 ] Training-Loss = 0.4291 Training-Accuracy = 0.5709050595760345 Validation-Accuracy = 0.7483660130718954
Epoch [ 488 / 50

In [28]:
print("Maximum Training Accuracy = ", max(training_acc_list))
print("Maximum Validation Accuracy = ", max(val_acc_list))
model = model_list[training_acc_list.index(max(training_acc_list))]

Maximum Training Accuracy =  0.575679749250412
Maximum Validation Accuracy =  0.8006535947712419


In [29]:
predicted = model(x_train).to(device)
predicted = torch.max(predicted.data,1)[1]
confusionMatrixPrint(predicted.to('cpu').numpy(),y_train.to('cpu').numpy(),0)

------------------------------------------------------------------------------------

  Confusion Matrix for Train Data : 
         True Positive =  192            True Negetive =  129
        False Positive =  84           False Negetive =  816

------------------------------------------------------------------------------------

  Total Cases : 1221

Accuracy  :  0.8255528255528255
Precision :  0.6956521739130435
Recall    :  0.5981308411214953
F1 Score  :  0.64321608040201

------------------------------------------------------------------------------------


In [30]:
predicted = model(x_test).to(device)
predicted = torch.max(predicted.data,1)[1]
confusionMatrixPrint(predicted.to('cpu').numpy(),y_test.to('cpu').numpy(),1)

------------------------------------------------------------------------------------

  Confusion Matrix for Validation Data 

         True Positive =  23            True Negetive =  42
        False Positive =  34           False Negetive =  207

------------------------------------------------------------------------------------

  Total Cases : 306

Accuracy  :  0.7516339869281046
Precision :  0.40350877192982454
Recall    :  0.35384615384615387
F1 Score  :  0.3770491803278689

------------------------------------------------------------------------------------


In [31]:
torch.save(model,"saved_model/combined_nn.pt")

In [None]:
def get_trainable_data_2():
    global x_train, x_test, y_train, y_test, is_updated
    dataset_path = "C:/Users/Sampad/Desktop/Projects/Capstone/Implimentation/Code/0_DataSet/"

    df = pd.read_csv(dataset_path + "CompleteAnnotated.csv")

    df.tweet_text = df.tweet_text.apply(text_clean)
    complete_docs = []

    for i in range(df.shape[0]):
        complete_docs.append(df.iloc[i]['tweet_text'].split())

    complete_dict = Dictionary(complete_docs)
    complete_corpus = [complete_dict.doc2bow(text) for text in complete_docs]
    lda = LdaModel(complete_corpus, num_topics=10)

    num_topics = 10
    labels = []

    # LDA outputs
    user_topic_vectors = []
    print(
        '-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* Hawkes Process Started -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*\n\n')
    for i in range(get_number_of_users()):
        # from 0_Datset folder get each user tweets timeline and filter retweets from them
        i_user_df = get_retweet_df(i)
        if i_user_df.shape[0] < 2:
            continue

        # String Date Time to python Datetime library
        i_user_df.created_at = i_user_df.created_at.apply(dateTimeCreator)

        # time at which first retweet was made
        min_date = min(i_user_df['created_at'])

        # time to minutes passed from the first retweet
        timestamps = i_user_df.created_at.apply(get_timestamp, origin_date=min_date).to_numpy()

        min_time = np.min(timestamps)
        max_time = np.max(timestamps)

        # Min-Max Scaler (0, 1)
        sorted_time = (np.sort(np.unique((timestamps - min_time) / max_time)))

        # fit the model and get the Hawkes Expression kernal model output
        BaseLine, adj_mat = get_hawkes_model(timestamps=[sorted_time])

        # append the results

        # baselines.append(BaseLine)
        if df[df.user_id == i_user_df['user_id'][0]]['Annotation'].item() == 1 or \
                df[df.user_id == i_user_df['user_id'][0]]['Tag'].item() == 1:
            labels.append(1)
        else:
            labels.append(0)
        # hawkes_user_ids.append(i_user_df['user_id'][0])
        # adjs.append(adj_mat)

        # get topic vector for tweet in users.csv file
        tweet_text_list = df[df.user_id == i_user_df['user_id'][0]]['tweet_text'].item().split()
        topic_vector = get_topic_vector(tweet_text_list, num_topics, complete_dict, lda)
        topic_vector[-1] = BaseLine
        topic_vector[-2] = adj_mat

        user_topic_vectors.append(topic_vector)

    print(
        '-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* Hawkes Process Ended -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*\n')

    labels = np.array(labels)
    user_topic_vectors = np.array(user_topic_vectors)

    x_train, x_test, y_train, y_test = train_test_splitter(user_topic_vectors, labels)
    is_updated = True
    return user_topic_vectors, labels