In [1]:
import sys
sys.path.insert(1, '../..')

import torch
import torch.nn as nn
import random
import pandas as pd
import numpy as np
import time

from library.evaluation import ConfusionMatrix

random.seed(33)

dataset_name = "Phemernr2_CrossVal"
unique_name = "T5"

In [2]:
vectors = np.loadtxt("../../data/processed/vectors/Phemernr2_T5_vectors.txt", delimiter=",")
first = vectors[0]
vectors.shape

(6425, 768)

In [3]:
data = pd.read_csv("../../data/processed/phemernr2_dataset_with_tvt.csv", lineterminator="\n")
data.head()

Unnamed: 0,tweet_id,tweet_text,label,label2,topic,tvt,cv_fold,tt
0,552833795142209536,The East London Mosque would like to offer its...,non-rumours,non-rumours,charliehebdo-all-rnr-threads,test,2,test
1,580318210609696769,BREAKING - A Germanwings Airbus A320 plane rep...,rumours,true,germanwings-crash-all-rnr-threads,training,3,training
2,552798891994009601,Reports that two of the dead in the #CharlieHe...,rumours,true,charliehebdo-all-rnr-threads,test,2,test
3,576790814942236672,After #Putin disappeared Russian TV no longer ...,non-rumours,non-rumours,putinmissing-all-rnr-threads,test,2,test
4,499678822598340608,Saw #Ferguson for myself. #justiceformichaelbr...,non-rumours,non-rumours,ferguson-all-rnr-threads,training,3,training


In [4]:
labels_str = data['label2'].unique().tolist()
labels_str

['non-rumours', 'true', 'unverified', 'false']

In [5]:
labels = []
for i, d in data.iterrows():
    lab = labels_str.index(d['label2'])
    labels.append(lab)
labels[:10]

[0, 1, 1, 0, 0, 0, 0, 2, 0, 0]

In [6]:
test_vectors = np.array([vectors[i] for i, p2 in data.iterrows() if p2['cv_fold'] == 0])
test_labels = np.array([labels[i] for i, p2 in data.iterrows() if p2['cv_fold'] == 0])

In [7]:
print(test_vectors.shape)
print(test_labels.shape)

(630, 768)
(630,)


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes):
        super(BasicBlock, self).__init__()
        self.lin1 = nn.Linear(in_planes, planes)
        self.bn1 = nn.BatchNorm1d(planes)
        self.lin2 = nn.Linear(planes, planes)
        self.bn2 = nn.BatchNorm1d(planes)

        self.shortcut = nn.Sequential()
        if in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Linear(in_planes, planes),
                nn.BatchNorm1d(planes)
            )

    def forward(self, x):
        out = F.leaky_relu(self.bn1(self.lin1(x)), 0.1)
        out = self.bn2(self.lin2(out))
        out += self.shortcut(x)
        out = F.leaky_relu(out, 0.1)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes):
        super(Bottleneck, self).__init__()
        self.lin1 = nn.Linear(in_planes, planes)
        self.bn1 = nn.BatchNorm1d(planes)
        self.lin2 = nn.Linear(planes, planes)
        self.bn2 = nn.BatchNorm1d(planes)
        self.lin3 = nn.Linear(planes, planes)
        self.bn3 = nn.BatchNorm1d(planes)

        self.shortcut = nn.Sequential()
        if in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Linear(in_planes, planes),
                nn.BatchNorm1d(planes)
            )

    def forward(self, x):
        out = F.leaky_relu(self.bn1(self.lin1(x)), 0.1)
        out = F.leaky_relu(self.bn2(self.lin2(out)), 0.1)
        out = self.bn3(self.lin3(out))
        out += self.shortcut(x)
        out = F.leaky_relu(out, 0.1)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, n_input=768, num_classes=1):
        super(ResNet, self).__init__()
        self.in_planes = 512

        self.lin1 = nn.Linear(n_input, self.in_planes)
        self.bn1 = nn.BatchNorm1d(self.in_planes)
        self.layer1 = self._make_layer(block, 512, num_blocks[0])
        self.layer2 = self._make_layer(block, 256, num_blocks[1])
        self.layer3 = self._make_layer(block, 128, num_blocks[2])
        self.layer4 = self._make_layer(block, 64, num_blocks[3])
        self.linear = nn.Linear(64, num_classes)

    def _make_layer(self, block, planes, num_blocks):
        strides = [1] * num_blocks
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.leaky_relu(self.bn1(self.lin1(x)), 0.1)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet10(n_input=768, n_output=1, block=BasicBlock):
    return ResNet(block, [1, 1, 1, 1], n_input, n_output)

    
def ResNet18(n_input=768, n_output=1, block=BasicBlock):
    return ResNet(block, [2, 2, 2, 2], n_input, n_output)


def ResNet34(n_input=768, n_output=1, block=BasicBlock):
    return ResNet(block, [3, 4, 6, 3], n_input, n_output)


def ResNet50(n_input=768, n_output=1, block=Bottleneck):
    return ResNet(block, [3, 4, 6, 3], n_input, n_output)


def ResNet101(n_input=768, n_output=1, block=Bottleneck):
    return ResNet(block, [3, 4, 23, 3], n_input, n_output)


def ResNet152(n_input=768, n_output=1, block=Bottleneck):
    return ResNet(block, [3, 8, 36, 3], n_input, n_output)

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class CNNBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(CNNBasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.leaky_relu(self.bn1(self.conv1(x)), 0.1)
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.leaky_relu(out, 0.1)
        return out


class CNNBottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes):
        super(CNNBottleneck, self).__init__()
        self.lin1 = nn.Linear(in_planes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.lin2 = nn.Linear(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.lin3 = nn.Linear(planes, planes)
        self.bn3 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Linear(in_planes, planes),
                nn.BatchNorm1d(planes)
            )

    def forward(self, x):
        out = F.leaky_relu(self.bn1(self.lin1(x)), 0.1)
        out = F.leaky_relu(self.bn2(self.lin2(out)), 0.1)
        out = self.bn3(self.lin3(out))
        out += self.shortcut(x)
        out = F.leaky_relu(out, 0.1)
        return out


class CNNResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=1):
        super(CNNResNet, self).__init__()
        self.in_planes = 24

        self.conv1 = nn.Conv2d(1, 24, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(24)
        self.layer1 = self._make_layer(block, 24, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 48, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 64, num_blocks[3], stride=2)
        self.linear = nn.Linear(64 * 24 * 32, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.leaky_relu(self.bn1(self.conv1(x)), 0.1)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def CNNResNet10(n_output: int):
    return CNNResNet(CNNBasicBlock, [1, 1, 1, 1], n_output)

    
def CNNResNet18(n_output: int):
    return CNNResNet(CNNBasicBlock, [2, 2, 2, 2], n_output)


def CNNResNet34(n_output: int):
    return CNNResNet(CNNBasicBlock, [3, 4, 6, 3], n_output)


def CNNResNet50(n_output: int):
    return CNNResNet(CNNBottleneck, [3, 4, 6, 3], n_output)


def CNNResNet101(n_output: int):
    return CNNResNet(CNNBottleneck, [3, 4, 23, 3], n_output)


def CNNResNet152(n_output: int):
    return CNNResNet(CNNBottleneck, [3, 8, 36, 3], n_output)

In [10]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
import matplotlib.pyplot as plt
import time
import os
from typing import Callable


class ResNetClassifier(nn.Module):
    def __init__(self,
        model,
        n_input: int,
        n_output: int = 1,
        criterion: Callable = nn.BCELoss,
        n_features: int = 4,
        lr: float = 0.0002,
        beta1: float = 0.5,
        device: str = None,
        model_type: str = "mlp"
    ):
        super(ResNetClassifier, self).__init__()
        self.model = model
        self.model_type = model_type
        self.criterion = criterion()
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr, betas=(beta1, 0.999))
        
        if not device or device not in ['cpu', 'cuda']:
            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        else:
            self.device = device

        self.model = self.model.to(self.device)
        if self.device == 'cuda':
            self.model = torch.nn.DataParallel(self.model)
            cudnn.benchmark = True

    def forward(self, input):
        x = self.model(input)
        return x
    
    def load_pretrained(self, filepath: str, key: str = "net", is_parallel: bool = False):
        checkpoint = torch.load(filepath)
        if is_parallel:
            self.model = torch.nn.DataParallel(self.model)
        self.model.load_state_dict(checkpoint[key], strict=False)
    
    def save_model(self, saves: str):
        print(f"Saving model...")
        state = {
            'net': self.model.state_dict()
        }
        if not os.path.isdir('models'):
            os.mkdir('models')
        torch.save(state, f"../../data/models/{saves}.pth")
    
    def train_eval(self,
        train_x, train_y,
        test_x, test_y,
        n_iter: int = 100,
        batch_size: int = 128,
        saves: str = None
    ):
        trainset = torch.utils.data.TensorDataset(train_x, train_y) # create your datset
        trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size) # create your dataloader

        testset = torch.utils.data.TensorDataset(test_x, test_y) # create your datset
        testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size) # create your dataloader

        train_accs = []
        train_losses = []
        test_accs = []
        test_losses = []

        print(f"Using {self.device}")
        best_acc = 0
        current_loss = 1000
        best_test_acc = 0
        epoch = 0
        start_time = time.time()
        results = {}
        while True:
            epoch += 1
            self.model.train()
            train_loss = 0
            correct = 0
            total = 0
            for batch_idx, (inputs, targets) in enumerate(trainloader):
                self.model.zero_grad()
                inputs, targets = inputs.to(self.device), targets.to(self.device)

                if self.model_type == "cnn":
                    outputs = self.model(inputs.unsqueeze(1))
                elif self.model_type == "mlp":
                    outputs = self.model(inputs)

                loss = self.criterion(outputs, targets.long())
                loss.backward()
                self.optimizer.step()

                train_loss += loss.item()
                total += targets.size(0)
#                 for i, output in enumerate(outputs.tolist()):
#                     if targets[i,0].tolist() == round(output[0]):
#                         correct += 1

#             train_acc = round(100*correct/total, 4)
#             train_accs.append(train_acc)
            train_losses.append(train_loss)

            self.model.eval()
            test_loss = 0
            test_acc = 0
            with torch.no_grad():
                inputs, targets = test_x.to(self.device), test_y.to(self.device)
                
                if self.model_type == 'mlp':
                    outputs = self.model(inputs)
                else:
#                     outputs = self.model(inputs.reshape(inputs.shape[0], 1, 24, 32))
                    outputs = self.model(inputs.unsqueeze(1))

                loss = self.criterion(outputs, targets.long())
                test_loss += loss.item()
                
                if self.model_type == 'mlp':
                    preds = self.predict(test_x)
                else:
                    preds = self.predict(test_x.reshape(test_x.shape[0], 1, 24, 32))
                conf_mat = ConfusionMatrix(
                    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in test_y]),
                    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds.cpu().numpy()]),
                    binary=False
                )
                conf_mat.evaluate(logs=False)
                test_acc = conf_mat.accuracy

            test_losses.append(test_loss)
            
#             if (epoch) % round(n_iter/20) == 0:
#                 print(f"-- Epoch {epoch}, Train Loss : {train_loss}, Test Loss : {test_loss}")

            # Save checkpoint.
#             if saves and test_loss < best_loss:
#                 print(f"Saving after new best loss : {test_loss}")
#                 best_loss = test_loss
            if saves and test_acc > best_test_acc:
#                 print(f"Saving after new best accuracy : {test_acc}")
                best_test_acc = test_acc

                state = {
                    'net': self.model.state_dict(),
                }
                if not os.path.isdir('models'):
                    os.mkdir('models')
                torch.save(state, f"../../data/models/{saves}.pth")
            
            if epoch >= n_iter:
                break

#         # visualizing accuracy over epoch
#         fig, ax2 = plt.subplots(1)
#         plt.subplots_adjust(top = 0.99, bottom=0.01, hspace=1.5, wspace=0.4)

#         ax2.plot([i for i in range(len(train_losses))], train_losses, c='b', marker="o", label='Train Loss')
#         ax2.plot([i for i in range(len(test_losses))], test_losses, c='r', marker="o", label='Test Loss')
#         ax2.set_ylabel('Loss')
#         ax2.set_xlabel('Epoch')
#         ax2.set_xlim(0, len(train_losses))
#         ax2.set_ylim(min([min(train_losses), min(test_losses)])*0.1, max([max(train_losses), max(test_losses)]))
#         ax2.title.set_text(f"Loss over time (epoch)")
#         ax2.legend(loc='lower right')

#         plt.show()
    
    def predict(self, input_x):
        self.model.eval()
        with torch.no_grad():
            preds = self.model(torch.Tensor(input_x))
            preds = torch.log_softmax(preds, dim = 1)
            _, preds = torch.max(preds, dim = 1)
            return preds

In [11]:
print("3-Fold Cross Validation with ResNet10 CNN")

folds = [1, 2, 3]
for fold in folds:
    start = time.time()
    val_folds = [fold]
    train_folds = folds.copy()
    train_folds.remove(fold)

    train_vectors = np.array([vectors[i] for i, p2 in data.iterrows() if p2['cv_fold'] in train_folds])
    val_vectors = np.array([vectors[i] for i, p2 in data.iterrows() if p2['cv_fold'] in val_folds])

    train_labels = np.array([labels[i] for i, p2 in data.iterrows() if p2['cv_fold'] in train_folds])
    val_labels = np.array([labels[i] for i, p2 in data.iterrows() if p2['cv_fold'] in val_folds])

    print(f"Fold-{fold} Cross Validation")
    model_name = f"{dataset_name}_ResNet10_CNN_{unique_name}_fold-{fold}"
    print(f"Multiclass Classification {model_name}")
    model = ResNetClassifier(CNNResNet10(n_output=4), train_vectors.shape[1], n_output=4, criterion=nn.CrossEntropyLoss, n_features=16, model_type="cnn") #, device="cpu")
    model.train_eval(torch.Tensor(train_vectors.reshape(train_vectors.shape[0], 24, 32)),
                    torch.Tensor(train_labels),
                    torch.Tensor(val_vectors.reshape(val_vectors.shape[0], 24, 32)),
                    torch.Tensor(val_labels),
                    saves=model_name,
                    n_iter=1000,
                    batch_size=1024)

    model.load_pretrained(f"../../data/models/{model_name}.pth")

    print(f"\n-------- Fold-{fold} Results --------")
    preds = model.predict(val_vectors.reshape(val_vectors.shape[0], 1, 24, 32))

    preds = preds.cpu().numpy()

    conf_mat = ConfusionMatrix(
        labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in val_labels]),
        predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
        binary=False,
        model_name=model_name
    )
    conf_mat.evaluate(classes=labels_str)
    
    print(f"-- Execution Time : {round(time.time() - start, 2)} seconds")
    print(f"-------- Fold-{fold} End --------\n")

3-Fold Cross Validation with ResNet10 CNN
Fold-1 Cross Validation
Multiclass Classification Phemernr2_CrossVal_ResNet10_CNN_T5_fold-1
Using cuda

-------- Fold-1 Results --------
1914 vs 1914
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 84.345 %
- Recall : 91.843 %
- F1 : 0.87934

Class true Evaluation
- Precision : 63.975 %
- Recall : 63.19 %
- F1 : 0.6358

Class unverified Evaluation
- Precision : 63.846 %
- Recall : 44.385 %
- F1 : 0.52366

Class false Evaluation
- Precision : 76.378 %
- Recall : 55.429 %
- F1 : 0.64238

Combined Evaluation
- Accuracy : 78.997 %
- Precision : 72.136 %
- Recall : 63.712 %
- F1 : 0.67663

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
Phemernr2_CrossVal_ResNet10_CNN_T5_fold-1, 78.997, 72.136, 63.712, 0.67663, 84.345, 91.843, 0.87934, 63.975, 63.19, 0.6358, 63.846, 44.385, 0.52366, 76.378, 55.429, 0.64238, 
-- Execution Time : 641.75 seconds
-------- Fold-1 End --------

Fold-2 Cross V

In [12]:
fold_n = 1
print(f"Testing Cross Validation Fold-{fold_n}")

model_name = f"{dataset_name}_ResNet10_CNN_{unique_name}_fold-{fold_n}"
model = ResNetClassifier(CNNResNet10(n_output=4), train_vectors.shape[1], n_output=4, criterion=nn.CrossEntropyLoss, n_features=16, model_type="cnn") #, device="cpu")

model.load_pretrained(f"../../data/models/{model_name}.pth")

print(f"\n-------- Testing Results --------")
preds = model.predict(test_vectors.reshape(test_vectors.shape[0], 1, 24, 32))

preds = preds.cpu().numpy()

conf_mat = ConfusionMatrix(
    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in test_labels]),
    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
    binary=False,
    model_name=model_name
)
conf_mat.evaluate(classes=labels_str)
print(f"-------- Testing End --------\n")

Testing Cross Validation Fold-1

-------- Testing Results --------
630 vs 630
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 82.009 %
- Recall : 92.612 %
- F1 : 0.86989

Class true Evaluation
- Precision : 67.619 %
- Recall : 65.138 %
- F1 : 0.66355

Class unverified Evaluation
- Precision : 71.739 %
- Recall : 44.0 %
- F1 : 0.54545

Class false Evaluation
- Precision : 72.549 %
- Recall : 55.224 %
- F1 : 0.62712

Combined Evaluation
- Accuracy : 78.095 %
- Precision : 73.479 %
- Recall : 64.243 %
- F1 : 0.68551

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
Phemernr2_CrossVal_ResNet10_CNN_T5_fold-1, 78.095, 73.479, 64.243, 0.68551, 82.009, 92.612, 0.86989, 67.619, 65.138, 0.66355, 71.739, 44.0, 0.54545, 72.549, 55.224, 0.62712, 
-------- Testing End --------



In [13]:
fold_n = 2
print(f"Testing Cross Validation Fold-{fold_n}")

model_name = f"{dataset_name}_ResNet10_CNN_{unique_name}_fold-{fold_n}"
model = ResNetClassifier(CNNResNet10(n_output=4), train_vectors.shape[1], n_output=4, criterion=nn.CrossEntropyLoss, n_features=16, model_type="cnn") #, device="cpu")

model.load_pretrained(f"../../data/models/{model_name}.pth")

print(f"\n-------- Testing Results --------")
preds = model.predict(test_vectors.reshape(test_vectors.shape[0], 1, 24, 32))

preds = preds.cpu().numpy()

conf_mat = ConfusionMatrix(
    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in test_labels]),
    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
    binary=False,
    model_name=model_name
)
conf_mat.evaluate(classes=labels_str)
print(f"-------- Testing End --------\n")

Testing Cross Validation Fold-2

-------- Testing Results --------
630 vs 630
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 78.728 %
- Recall : 94.723 %
- F1 : 0.85988

Class true Evaluation
- Precision : 74.359 %
- Recall : 53.211 %
- F1 : 0.62032

Class unverified Evaluation
- Precision : 62.745 %
- Recall : 42.667 %
- F1 : 0.50794

Class false Evaluation
- Precision : 80.0 %
- Recall : 53.731 %
- F1 : 0.64286

Combined Evaluation
- Accuracy : 76.984 %
- Precision : 73.958 %
- Recall : 61.083 %
- F1 : 0.66907

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
Phemernr2_CrossVal_ResNet10_CNN_T5_fold-2, 76.984, 73.958, 61.083, 0.66907, 78.728, 94.723, 0.85988, 74.359, 53.211, 0.62032, 62.745, 42.667, 0.50794, 80.0, 53.731, 0.64286, 
-------- Testing End --------



In [14]:
fold_n = 3
print(f"Testing Cross Validation Fold-{fold_n}")

model_name = f"{dataset_name}_ResNet10_CNN_{unique_name}_fold-{fold_n}"
model = ResNetClassifier(CNNResNet10(n_output=4), train_vectors.shape[1], n_output=4, criterion=nn.CrossEntropyLoss, n_features=16, model_type="cnn") #, device="cpu")

model.load_pretrained(f"../../data/models/{model_name}.pth")

print(f"\n-------- Testing Results --------")
preds = model.predict(test_vectors.reshape(test_vectors.shape[0], 1, 24, 32))

preds = preds.cpu().numpy()

conf_mat = ConfusionMatrix(
    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in test_labels]),
    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
    binary=False,
    model_name=model_name
)
conf_mat.evaluate(classes=labels_str)
print(f"-------- Testing End --------\n")

Testing Cross Validation Fold-3

-------- Testing Results --------
630 vs 630
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 81.925 %
- Recall : 92.084 %
- F1 : 0.86708

Class true Evaluation
- Precision : 62.832 %
- Recall : 65.138 %
- F1 : 0.63964

Class unverified Evaluation
- Precision : 63.043 %
- Recall : 38.667 %
- F1 : 0.47934

Class false Evaluation
- Precision : 82.222 %
- Recall : 55.224 %
- F1 : 0.66071

Combined Evaluation
- Accuracy : 77.143 %
- Precision : 72.506 %
- Recall : 62.778 %
- F1 : 0.67292

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
Phemernr2_CrossVal_ResNet10_CNN_T5_fold-3, 77.143, 72.506, 62.778, 0.67292, 81.925, 92.084, 0.86708, 62.832, 65.138, 0.63964, 63.043, 38.667, 0.47934, 82.222, 55.224, 0.66071, 
-------- Testing End --------



In [15]:
print("3-Fold Cross Validation with ResNet18 CNN")

folds = [1, 2, 3]
for fold in folds:
    start = time.time()
    val_folds = [fold]
    train_folds = folds.copy()
    train_folds.remove(fold)

    train_vectors = np.array([vectors[i] for i, p2 in data.iterrows() if p2['cv_fold'] in train_folds])
    val_vectors = np.array([vectors[i] for i, p2 in data.iterrows() if p2['cv_fold'] in val_folds])

    train_labels = np.array([labels[i] for i, p2 in data.iterrows() if p2['cv_fold'] in train_folds])
    val_labels = np.array([labels[i] for i, p2 in data.iterrows() if p2['cv_fold'] in val_folds])

    print(f"Fold-{fold} Cross Validation")
    model_name = f"{dataset_name}_ResNet18_CNN_{unique_name}_fold-{fold}"
    print(f"Multiclass Classification {model_name}")
    model = ResNetClassifier(CNNResNet18(n_output=4), train_vectors.shape[1], n_output=4, criterion=nn.CrossEntropyLoss, n_features=16, model_type="cnn") #, device="cpu")
    model.train_eval(torch.Tensor(train_vectors.reshape(train_vectors.shape[0], 24, 32)),
                    torch.Tensor(train_labels),
                    torch.Tensor(val_vectors.reshape(val_vectors.shape[0], 24, 32)),
                    torch.Tensor(val_labels),
                    saves=model_name,
                    n_iter=1000,
                    batch_size=1024)

    model.load_pretrained(f"../../data/models/{model_name}.pth")

    print(f"\n-------- Fold-{fold} Results --------")
    preds = model.predict(val_vectors.reshape(val_vectors.shape[0], 1, 24, 32))

    preds = preds.cpu().numpy()

    conf_mat = ConfusionMatrix(
        labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in val_labels]),
        predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
        binary=False,
        model_name=model_name
    )
    conf_mat.evaluate(classes=labels_str)
    
    print(f"-- Execution Time : {round(time.time() - start, 2)} seconds")
    print(f"-------- Fold-{fold} End --------\n")

3-Fold Cross Validation with ResNet18 CNN
Fold-1 Cross Validation
Multiclass Classification Phemernr2_CrossVal_ResNet18_CNN_T5_fold-1
Using cuda

-------- Fold-1 Results --------
1914 vs 1914
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 81.656 %
- Recall : 93.312 %
- F1 : 0.87096

Class true Evaluation
- Precision : 64.527 %
- Recall : 58.589 %
- F1 : 0.61415

Class unverified Evaluation
- Precision : 60.577 %
- Recall : 33.69 %
- F1 : 0.43299

Class false Evaluation
- Precision : 75.221 %
- Recall : 48.571 %
- F1 : 0.59028

Combined Evaluation
- Accuracy : 77.482 %
- Precision : 70.495 %
- Recall : 58.54 %
- F1 : 0.63964

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
Phemernr2_CrossVal_ResNet18_CNN_T5_fold-1, 77.482, 70.495, 58.54, 0.63964, 81.656, 93.312, 0.87096, 64.527, 58.589, 0.61415, 60.577, 33.69, 0.43299, 75.221, 48.571, 0.59028, 
-- Execution Time : 1128.32 seconds
-------- Fold-1 End --------

Fold-2 Cross 

In [16]:
fold_n = 1
print(f"Testing Cross Validation Fold-{fold_n}")

model_name = f"{dataset_name}_ResNet18_CNN_{unique_name}_fold-{fold_n}"
model = ResNetClassifier(CNNResNet18(n_output=4), train_vectors.shape[1], n_output=4, criterion=nn.CrossEntropyLoss, n_features=16, model_type="cnn") #, device="cpu")

model.load_pretrained(f"../../data/models/{model_name}.pth")

print(f"\n-------- Testing Results --------")
preds = model.predict(test_vectors.reshape(test_vectors.shape[0], 1, 24, 32))

preds = preds.cpu().numpy()

conf_mat = ConfusionMatrix(
    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in test_labels]),
    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
    binary=False,
    model_name=model_name
)
conf_mat.evaluate(classes=labels_str)
print(f"-------- Testing End --------\n")

Testing Cross Validation Fold-1

-------- Testing Results --------
630 vs 630
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 78.924 %
- Recall : 92.876 %
- F1 : 0.85333

Class true Evaluation
- Precision : 61.765 %
- Recall : 57.798 %
- F1 : 0.59716

Class unverified Evaluation
- Precision : 68.293 %
- Recall : 37.333 %
- F1 : 0.48276

Class false Evaluation
- Precision : 73.171 %
- Recall : 44.776 %
- F1 : 0.55556

Combined Evaluation
- Accuracy : 75.079 %
- Precision : 70.538 %
- Recall : 58.196 %
- F1 : 0.63775

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
Phemernr2_CrossVal_ResNet18_CNN_T5_fold-1, 75.079, 70.538, 58.196, 0.63775, 78.924, 92.876, 0.85333, 61.765, 57.798, 0.59716, 68.293, 37.333, 0.48276, 73.171, 44.776, 0.55556, 
-------- Testing End --------



In [17]:
fold_n = 2
print(f"Testing Cross Validation Fold-{fold_n}")

model_name = f"{dataset_name}_ResNet18_CNN_{unique_name}_fold-{fold_n}"
model = ResNetClassifier(CNNResNet18(n_output=4), train_vectors.shape[1], n_output=4, criterion=nn.CrossEntropyLoss, n_features=16, model_type="cnn") #, device="cpu")

model.load_pretrained(f"../../data/models/{model_name}.pth")

print(f"\n-------- Testing Results --------")
preds = model.predict(test_vectors.reshape(test_vectors.shape[0], 1, 24, 32))

preds = preds.cpu().numpy()

conf_mat = ConfusionMatrix(
    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in test_labels]),
    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
    binary=False,
    model_name=model_name
)
conf_mat.evaluate(classes=labels_str)
print(f"-------- Testing End --------\n")

Testing Cross Validation Fold-2

-------- Testing Results --------
630 vs 630
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 79.075 %
- Recall : 94.723 %
- F1 : 0.86194

Class true Evaluation
- Precision : 68.0 %
- Recall : 62.385 %
- F1 : 0.65072

Class unverified Evaluation
- Precision : 69.231 %
- Recall : 36.0 %
- F1 : 0.47368

Class false Evaluation
- Precision : 75.676 %
- Recall : 41.791 %
- F1 : 0.53846

Combined Evaluation
- Accuracy : 76.508 %
- Precision : 72.995 %
- Recall : 58.725 %
- F1 : 0.65087

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
Phemernr2_CrossVal_ResNet18_CNN_T5_fold-2, 76.508, 72.995, 58.725, 0.65087, 79.075, 94.723, 0.86194, 68.0, 62.385, 0.65072, 69.231, 36.0, 0.47368, 75.676, 41.791, 0.53846, 
-------- Testing End --------



In [18]:
fold_n = 3
print(f"Testing Cross Validation Fold-{fold_n}")

model_name = f"{dataset_name}_ResNet18_CNN_{unique_name}_fold-{fold_n}"
model = ResNetClassifier(CNNResNet18(n_output=4), train_vectors.shape[1], n_output=4, criterion=nn.CrossEntropyLoss, n_features=16, model_type="cnn") #, device="cpu")

model.load_pretrained(f"../../data/models/{model_name}.pth")

print(f"\n-------- Testing Results --------")
preds = model.predict(test_vectors.reshape(test_vectors.shape[0], 1, 24, 32))

preds = preds.cpu().numpy()

conf_mat = ConfusionMatrix(
    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in test_labels]),
    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
    binary=False,
    model_name=model_name
)
conf_mat.evaluate(classes=labels_str)
print(f"-------- Testing End --------\n")

Testing Cross Validation Fold-3

-------- Testing Results --------
630 vs 630
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 82.619 %
- Recall : 91.557 %
- F1 : 0.86859

Class true Evaluation
- Precision : 60.0 %
- Recall : 63.303 %
- F1 : 0.61607

Class unverified Evaluation
- Precision : 61.538 %
- Recall : 42.667 %
- F1 : 0.50394

Class false Evaluation
- Precision : 81.395 %
- Recall : 52.239 %
- F1 : 0.63636

Combined Evaluation
- Accuracy : 76.667 %
- Precision : 71.388 %
- Recall : 62.441 %
- F1 : 0.66615

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
Phemernr2_CrossVal_ResNet18_CNN_T5_fold-3, 76.667, 71.388, 62.441, 0.66615, 82.619, 91.557, 0.86859, 60.0, 63.303, 0.61607, 61.538, 42.667, 0.50394, 81.395, 52.239, 0.63636, 
-------- Testing End --------

