In [1]:
import os
import torch
from torch import nn
import numpy

class MultiTaskLoss(nn.Module):
  def __init__(self, n_tasks, reduction='none'):
    super(MultiTaskLoss, self).__init__()
    self.n_tasks = n_tasks
    self.log_vars = nn.Parameter(torch.zeros(self.n_tasks))
    self.reduction = reduction

  def forward(self, losses):
    dtype = losses.dtype
    device = losses.device
    stds = (torch.exp(self.log_vars)**(1/2)).to(device).to(dtype)
    multi_task_losses = (1 / (stds ** 2)) * losses + torch.log(stds)

    if self.reduction == 'sum':
      multi_task_losses = multi_task_losses.sum()
    if self.reduction == 'mean':
      multi_task_losses = multi_task_losses.mean()

    return multi_task_losses

In [2]:
class AverageMeter(object):

    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [3]:
RESIZE = (128, 171)
MEAN = [0.43216, 0.394666, 0.37645]
STD = [0.22803, 0.22145, 0.216989]
CROP_SIZE = 112
NUM_FRAMES = 10
NCHANNELS = 3
NTH_SECOND = 30

In [4]:
import os

import torch
import torch.utils.data as data
import pandas as pd
import numpy as np

from opencv_transforms.transforms import (
    Compose, Normalize, Resize, CenterCrop, ToTensor
)

import cv2


class HateSpeechData(data.Dataset):

    def __init__(self, args, phase):
        self.mapping = {"Organisation": 0, "Location": 1, "Individual": 2, "Community": 3, "None": 4}
        self.args = args
        if phase == 'train':
            self.comments = self.load_comments(self.args["train_question_file"])
        elif phase == 'validation':
            self.comments = self.load_comments(self.args["validation_question_file"])
        else:
            self.comments = self.load_comments(self.args["test_question_file"])

        self.comments[['videoID', 'source']] = self.comments['url'].apply(lambda x: self.parse_urls(x))
        
        if args["add_video"]:
            self.transform = Compose(
                [
                    Resize(RESIZE),
                    CenterCrop(CROP_SIZE),
                    ToTensor(),
                    Normalize(MEAN, STD)
                ]
            )

        if args["add_other_comments"]:
            self.other_comments_data = self.load_metadata(self.args["other_comments_path"])
            self.comments = pd.merge(self.comments, self.other_comments_data, how='left', on=['url', 'comment'])
        if args["add_title"] or args["add_description"] or args["add_transcription"]:
            self.metadata = self.load_metadata(args["metadata_path"])
            self.metadata = self.metadata.replace(np.nan, '', regex=True)
            if "bert" in self.args["model"]:
                if self.args["desc_keyphrase_extract"]:
                    self.metadata['desc'] = self.metadata['key_phrases_desc_bert']
                if self.args["transcript_keyphrase_extract"]:
                    self.metadata['transcript'] = self.metadata['key_phrases_transcript_bert']
            elif "longformer" in self.args["model"]:
                if self.args["desc_keyphrase_extract"]:
                    self.metadata['desc'] = self.metadata['key_phrases_desc_long']
                if self.args["transcript_keyphrase_extract"]:
                    self.metadata['transcript'] = self.metadata['key_phrases_transcript_long']
            self.comments = pd.merge(self.comments, self.metadata, how='left', on='url')
        self.comments = self.comments.replace(np.nan, '', regex=True)
        self.comments.drop_duplicates(inplace=True)
        if self.args["remove_none"]:
            self.comments = self.comments[self.comments['label'] == True]
        self.comments = self.comments.reset_index(drop=True)

    def load_metadata(self, filename):
        df = pd.read_csv(filename)
        return df
    
    def parse_urls(self, url):
        videoID = ""
        source = ""
        if "youtube" in url:
            source = "youtube"
            videoID = url.split("watch?v=")[1].split("_channel=")[0].split("&t=")[0].split("&lc=")[0].split("&ab")[0].split("&")[0]
            if len(videoID) != 11:
                print(videoID)
            assert len(videoID) == 11

        elif "bitchute" in url:
            source = "bitchute"
            videoID = url.split("/")[-2]
    
        return  pd.Series({'videoID': videoID, 'source': source})

    def load_comments(self, filename):
        df = pd.read_csv(filename)
        df['label'] = df['label'].apply(lambda x: int(x == 'yes'))
        return df

    def __len__(self):
        return len(self.comments)

    def __getitem__(self, index):
        comment = self.comments['comment'][index] if self.args["add_comment"] else ''
        title = self.comments['title'][index] if self.args["add_title"] else ''
        desc = self.comments['desc'][index] if self.args["add_description"] else ''
        transcript = self.comments['transcript'][index] if self.args["add_transcription"] else ''
        other_comment = self.comments['key_phrases_other_comments'][index] if self.args["add_other_comments"] else ''

        frame_data = torch.zeros(NCHANNELS, 1, CROP_SIZE, CROP_SIZE)
        if self.args["add_video"]:
            frame_data = []
            filename = os.path.join(self.args["video_path, self.comments['source'][index], self.comments['videoID'][index]"])
            vidcap = cv2.VideoCapture(f"{filename}.mp4")
            frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
            frames_step = frame_rate * NTH_SECOND

            num_frames = 0
            while vidcap.isOpened():
                ret, image = vidcap.read()
                if ret:
                    image = self.transform(image)
                    image = image[None, ...]
                    frame_data.append(image)
                    num_frames += 1
                    vidcap.set(cv2.CAP_PROP_POS_FRAMES, num_frames * frames_step)
                else:
                    vidcap.release()
                    break

            frame_data = torch.cat(frame_data) # Number of frames, channels, image width, image height
            frame_data = torch.movedim(frame_data, 1, 0) # channels, Number of frames, image width, image height
            
        target_multilabel = np.zeros(5, dtype=float)
        if self.args["remove_none"]:
            target_multilabel = np.zeros(4, dtype=float) 
        labels = self.comments['hate_towards_whom'][index].split(',')
        for label in labels:
            label = label.strip()
            if label == '' or len(label) == 0:
              print(comment, index, labels, title)
            target_multilabel[self.mapping[label]] = 1
        target_multilabel = torch.FloatTensor(target_multilabel)
        target_binary = torch.FloatTensor([self.comments['label'][index]])
        
        return comment, title, desc, transcript, other_comment, frame_data, target_binary, target_multilabel


In [5]:
import torch
from torch import nn
import torchvision
from transformers import LongformerModel, LongformerTokenizer
from transformers import BertTokenizer, BertModel

class LFEmbeddingModule(nn.Module):
    def __init__(self, args, device):
        super(LFEmbeddingModule, self).__init__()
        self.args = args
        if 'longformer' in self.args["model"]:
            self.lf_model = LongformerModel.from_pretrained(self.args["model"], output_hidden_states=True).to(device)
            self.lf_tokenizer = LongformerTokenizer.from_pretrained(self.args["model"])
        else:
            self.lf_model = BertModel.from_pretrained(self.args["model"], output_hidden_states=True).to(device)
            self.lf_tokenizer = BertTokenizer.from_pretrained(self.args["model"])

        self.device = device
        modules = [self.lf_model.embeddings, *self.lf_model.encoder.layer[:self.args["freeze_lf_layers"]]]
        for module in modules:
            for param in module.parameters():
                param.requires_grad = False
        
        
    def get_embeddings(self, comments, titles, descriptions, transcripts, other_comments):
        indexed_cs = []
        max_len_total = self.args["max_len"]
        max_len_title = self.args["title_token_count"]
        max_len_desc = self.args["desc_token_count"]
        max_len_trans = self.args["transcript_token_count"]
        max_len_other_comments = self.args["other_comments_token_count"]
        padding = 'max_length' if self.args["pad_metadata"] else False
        for comment, title, desc, transcript, other_comment in zip(comments, titles, descriptions, transcripts, other_comments):
            enc_c = []
            if self.args["add_comment"]:
                enc_c = self.lf_tokenizer.encode_plus(comment, max_length=max_len_total, padding=False, truncation=True)['input_ids']
            if self.args["add_title"]:
                enc_t = self.lf_tokenizer.encode_plus(title, max_length=max_len_title, padding=padding, truncation=True)['input_ids']
                if len(enc_c) == 0:
                    enc_c.extend(enc_t)
                else:
                    enc_c.extend(enc_t[1:])
            if self.args["add_description"]:
                enc_d = self.lf_tokenizer.encode_plus(desc, max_length=max_len_desc, padding=padding, truncation=True)['input_ids']
                if len(enc_c) == 0:
                    enc_c.extend(enc_d)
                else:
                    enc_c.extend(enc_d[1:])
            if self.args["add_transcription"]:
                enc_tr = self.lf_tokenizer.encode_plus(transcript, max_length=max_len_trans, padding=padding, truncation=True)['input_ids']
                if len(enc_c) == 0:
                    enc_c.extend(enc_tr)
                else:
                    enc_c.extend(enc_tr[1:])
            if self.args["add_other_comments"]:
                enc_oc = self.lf_tokenizer.encode_plus(other_comment, max_length=max_len_other_comments, padding=padding, truncation=True)['input_ids']
                if len(enc_c) == 0:
                    enc_c.extend(enc_oc)
                else:
                    enc_c.extend(enc_oc[1:])
            enc_c = enc_c[:max_len_total]
            enc_c.extend((max_len_total - len(enc_c))*[self.lf_tokenizer.pad_token_id])
            indexed_cs.append(enc_c)
        indexed_cs = torch.tensor(indexed_cs).to(self.device)
        embedding = self.lf_model(indexed_cs)
        return embedding

class VisionModule(nn.Module):
    def __init__(self, args, device):
        super(VisionModule, self).__init__()
        self.args = args
        self.device = device
        pretrained_model = torchvision.models.video.r3d_18(pretrained=True)
        self.model = torch.nn.Sequential(*(list(pretrained_model.children())[:-1])).to(device)
        for param in self.model.parameters():
            param.requires_grad = False

    def get_embeddings(self, frames):
        frames = frames.to(self.device)
        vision_embedding = []
        for frame in list(frames):
            sum_frame = frame.sum(dim=(0, 2, 3))
            num_zero_indices = (sum_frame == 0).nonzero().flatten().shape[0]
            final_frame = frame[:, :(-num_zero_indices if num_zero_indices else frame.shape[1]), :, :]
            _vis_emb = self.model(final_frame[None, ...])
            _vis_emb = torch.flatten(_vis_emb, start_dim=1)
            vision_embedding.append(_vis_emb)

        vision_embedding = torch.cat(vision_embedding)
        return vision_embedding

        
class CommentModel(nn.Module):
    def __init__(self, args):
        super(CommentModel, self).__init__()
        self.args = args
        if 'base' in self.args["model"]:
            self.fc_size = 768
        else:
            self.fc_size = 1024
        if self.args["add_video"]:
            self.fc_size += 512
        output_size = 5
        if self.args["remove_none"]:
            output_size = 4
        self.fc_multilabel = nn.Sequential(
            nn.Linear(self.fc_size, output_size),
        )
        self.fc_binary = nn.Sequential(
            nn.Linear(self.fc_size, 1),
            nn.Sigmoid()
        )

    def forward(self, text_emb, vision_emb):
        inp_emb = text_emb
        if self.args["add_video"]:
            inp_emb = torch.cat([inp_emb, vision_emb], dim = 1)
        
        out = [self.fc_multilabel(inp_emb), self.fc_binary(inp_emb)]
        return out

In [6]:
import argparse
import os
import numpy as np

import torch
from torch.utils.data import DataLoader
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import ast

In [7]:
def accuracy(pred, labels):
    return np.sum(pred == labels)/pred.shape[0]

def save_checkpoint(state, args, name, filename='checkpoint.pth.tar', is_best=False):
    # torch.save(state, filename)
    if is_best:
        lf_filename = os.path.join(args["work_dir"], 'lf_model_' + str(name) +'.pth.tar')
        comment_filename = os.path.join(args["work_dir"], 'comment_model_' + str(name) +'.pth.tar')
        best_filename = lf_filename if 'lf_model' in filename else comment_filename
        torch.save(state, best_filename)
        # shutil.copyfile(filename, best_filename)


def load_weights(name, lf_model, comment_model, args):
    lf_checkpoint = os.path.join(args["work_dir"], 'lf_model_' + str(name)+'.pth.tar')
    comment_checkpoint = os.path.join(args["work_dir"], 'comment_model_' + str(name)+'.pth.tar')
    
    lf_model.lf_model.load_state_dict(torch.load(lf_checkpoint, map_location=torch.device('mps'))['state_dict'])
    comment_model.load_state_dict(torch.load(comment_checkpoint, map_location=torch.device('mps'))['state_dict'])
    return

def collate_fn(batch):
    comments, titles, descriptions, transcriptions, other_comments, frames, label_binary, label_multilabel = zip(*batch)

    max_frames = max([image.size(1) for image in frames])
    frames = torch.tensor(np.array([F.pad(image, [0, 0, 0, 0, 0, max_frames - image.size(1)]).numpy() for image in frames]))
    label_binary = torch.tensor(label_binary).reshape(-1, 1)
    label_multilabel = torch.stack(list(label_multilabel), dim=0)
    return [comments, titles, descriptions, transcriptions, other_comments, frames, label_binary, label_multilabel]


def get_data_loaders(args, phase):
    shuffle = True if phase == "train" else False
    data = HateSpeechData(args, phase)
    dataloader = DataLoader(data, collate_fn=collate_fn, batch_size=args["batch_size"], shuffle=shuffle, num_workers=args["num_workers"])
    return dataloader

In [8]:
def train_one_epoch(train_loader, epoch, phase, device, criterions, optimizer, lf_model, vision_model, comment_model, multitaskloss_instance, args):
    
    lf_model.lf_model.train()
    vision_model.model.eval()
    comment_model.train()
    multitaskloss_instance.train()

    
    losses = AverageMeter()
    acces = AverageMeter()
    for itr, (comment, title, description, transcription, other_comments, frames, label_binary, label_multilabel) in enumerate(train_loader):
        label_binary = label_binary.to(device)
        label_multilabel = label_multilabel.to(device)

        vis_emb = None
        if args["add_video"]:
            vis_emb = vision_model.get_embeddings(frames)
        
        output = comment_model(lf_model.get_embeddings(comment, title, description, transcription, other_comments)[1], vis_emb)

        loss_multilabel = criterions[0](output[0], label_multilabel)        
        loss_binary = criterions[1](output[1], label_binary)      

        losses_stack = torch.stack([loss_multilabel, loss_binary])
        multitaskloss = multitaskloss_instance(losses_stack)
        
        optimizer.zero_grad()
        multitaskloss.backward()
        optimizer.step()

        output_binary = np.round(output[1].detach().cpu().numpy())
        label_binary = np.round(label_binary.detach().cpu().numpy())
    
        acc = accuracy(output_binary, label_binary)
        acces.update(acc, args["batch_size"])
        
        losses.update(multitaskloss.data.item(), args["batch_size"])

        if itr % 25 == 0:
            print(phase + ' Epoch-{:<3d} Iter-{:<3d}/{:<3d}\t'
                'loss {loss.val:.4f} ({loss.avg:.4f})\t'
                'accu {acc.val:.3f} ({acc.avg:.3f})\t'.format(
                epoch, itr, len(train_loader), loss=losses, acc=acces))

    return losses.avg, acces.avg
        
def eval_one_epoch(test_loader, epoch, phase, device, criterions, lf_model, vision_model, comment_model, multitaskloss_instance, args):

    lf_model.lf_model.eval()
    vision_model.model.eval()
    comment_model.eval()
    multitaskloss_instance.eval()

    losses = AverageMeter()
    acces = AverageMeter()
    
    preds = []
    labels = []
    with torch.no_grad():
        for itr, (comment, title, description, transcription, other_comments, frames, label_binary, label_multilabel) in enumerate(test_loader):
            label_binary = label_binary.to(device)
            label_multilabel = label_multilabel.to(device)

            vis_emb = None
            if args["add_video"]:
                vis_emb = vision_model.get_embeddings(frames)

            output = comment_model(lf_model.get_embeddings(comment, title, description, transcription, other_comments)[1], vis_emb)

            loss_multilabel = criterions[0](output[0], label_multilabel)        
            loss_binary = criterions[1](output[1], label_binary)      

            losses_stack = torch.stack([loss_multilabel, loss_binary])
            multitaskloss = multitaskloss_instance(losses_stack)

            output_binary = output[1].detach().cpu().numpy()
            output_multilabel = output[0].detach().cpu().numpy()
            # print(np.sum(np.exp(output_multilabel), axis=-1))
            output_multilabel = np.exp(output_multilabel) / np.sum(np.exp(output_multilabel), axis=-1)[:, np.newaxis]

            label_binary = np.round(label_binary.detach().cpu().numpy())
            label_multilabel = np.round(label_multilabel.detach().cpu().numpy())
        
            acc = accuracy(np.round(output_binary), label_binary)
            acces.update(acc, args["batch_size"])

            losses.update(multitaskloss.data.item(), args["batch_size"])

            final_outputs = np.c_[output_multilabel, output_binary]
            final_labels = np.c_[label_multilabel, label_binary]
            # print(final_outputs.shape)

            preds.extend([final_outputs.tolist()])
            labels.extend([final_labels.tolist()])
        

            if itr % 25 == 0:
                print(phase + ' Epoch-{:<3d} Iter-{:<3d}/{:<3d}\t'
                    'loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'accu {acc.val:.3f} ({acc.avg:.3f})\t'.format(
                    epoch, itr, len(test_loader), loss=losses, acc=acces))     

    return losses.avg, acces.avg, preds, labels

In [10]:
def get_params():
  args = {}
  args["work_dir"]='../..//models/27May2023/'
  args["train_question_file"]='../../data/with_aug_ttv/train.csv'
  args["validation_question_file"]='../../data/with_aug_ttv/eval.csv'
  args["test_question_file"]='../../data/with_aug_ttv/test.csv'
  args["batch_size"]=8
  args["model"]='bert-large-cased'
  args["lr"]=0.0003
  args["num_workers"]=0
  args["max_epochs"]=0
  args["max_len"]=512
  args["gpu"]='0'
  args["freeze_lf_layers"]=23
  args["metadata_path"]='../../data/extra_data_trans.csv'
  args["pad_metadata"]=True
  args["add_comment"]=True
  args["add_title"]=False
  args["title_token_count"]=50
  args["add_description"]=False
  args["desc_keyphrase_extract"]=False
  args["desc_token_count"]=100
  args["add_transcription"]=False
  args["transcript_keyphrase_extract"]=False
  args["transcript_token_count"]=300
  args["other_comments_path"]='../../data/extra_data_other_comments.csv'
  args["add_other_comments"]=False
  args["other_comments_token_count"]=4096
  args["add_video"]=False
  args["video_path"]='../../data/videos/'
  args["multitask"]=True
  args["remove_none"]=False
  return args

In [11]:
common_args = get_params()
model_args = {}
new_args = common_args.copy()
# new_args.update({"model": "allenai/longformer-base-4096", "max_len": 4096, "freeze_lf_layers": 11, "add_title": True, "title_token_count": 80, "add_description": True, "desc_keyphrase_extract": True, "desc_token_count": 400, "add_transcription": True, "transcript_keyphrase_extract": True, "transcript_token_count": 2200, "add_other_comments": True})
# model_args["revived-dream-83"] = new_args
# new_args = common_args.copy()
# new_args.update({"model": "allenai/longformer-base-4096", "max_len": 4096, "freeze_lf_layers": 11, "add_title": True, "title_token_count": 100, "add_description": True, "desc_keyphrase_extract": True, "desc_token_count": 500, "add_transcription": True, "transcript_keyphrase_extract": True, "transcript_token_count": 4096})
# model_args["sandy-plasma-84"] = new_args
# new_args = common_args.copy()
# new_args.update({"model": "allenai/longformer-large-4096", "max_len": 4096, "freeze_lf_layers": 23, "add_title": True, "title_token_count": 80, "add_description": True, "desc_keyphrase_extract": True, "desc_token_count": 400, "add_transcription": True, "transcript_keyphrase_extract": True, "transcript_token_count": 2200, "add_other_comments": True})
# model_args["kind-sunset-85"] = new_args
# new_args = common_args.copy()
new_args.update({"model": "allenai/longformer-large-4096", "max_len": 4096, "freeze_lf_layers": 23, "add_title": True, "title_token_count": 100, "add_description": True, "desc_keyphrase_extract": True, "desc_token_count": 500, "add_transcription": True, "transcript_keyphrase_extract": True, "transcript_token_count": 4096})
model_args["fiery-brook-86"] = new_args

In [12]:
for model_name, args in model_args.items():
  device = "mps" if getattr(torch,'has_mps',False) else "gpu" if torch.cuda.is_available() else "cpu"
  print('number of available devices:', torch.cuda.device_count())

  train_loader = get_data_loaders(args, 'train')
  validation_loader = get_data_loaders(args, 'validation')
  test_loader = get_data_loaders(args, 'test')
  print('obtained dataloaders')

  lf_model = LFEmbeddingModule(args, device)
  vision_model = VisionModule(args, device)
  comment_model = CommentModel(args).to(device)
  multitaskloss_instance = MultiTaskLoss(n_tasks=2, reduction="sum")

  criterions = [nn.BCEWithLogitsLoss().to(device), nn.BCELoss().to(device)]

  params = []
  for model in [lf_model.lf_model, comment_model, multitaskloss_instance]:
      params += list(model.parameters())

  optimizer = optim.Adam(params, lr = args["lr"])
  scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)
  print('loaded models')

  if not os.path.exists(args["work_dir"]):
      os.mkdir(args["work_dir"])

  best_eval_loss = np.inf
  train_acc = 0
  eval_acc = 0
  train_loss = 0
  eval_loss = 0
  # for epoch in range(args["max_epochs"]):
  #     train_loss, train_acc = train_one_epoch(train_loader, epoch, 'Train', device, criterions, optimizer, lf_model, vision_model, comment_model, multitaskloss_instance, args)
  #     eval_loss, eval_acc, _, _ = eval_one_epoch(validation_loader, epoch, 'Eval', device, criterions, lf_model, vision_model, comment_model, multitaskloss_instance, args)
  #     print('Epoch-{:<3d} Train: loss {:.4f}\taccu {:.4f}\tEval: loss {:.4f}\taccu {:.4f}'
  #             .format(epoch, train_loss, train_acc, eval_loss, eval_acc))
  #     scheduler.step(eval_loss)
  #     is_better = False
  #     if eval_loss <= best_eval_loss:
  #         best_eval_loss = eval_loss
  #         is_better = True

  #     save_checkpoint({ 'epoch': epoch,
  #         'state_dict': lf_model.lf_model.state_dict(),
  #         'best_loss': eval_loss,
  #         'best_acc' : eval_acc,
  #         'monitor': 'eval_acc',
  #         'optimizer': optimizer.state_dict()
  #     }, args, 'shrey', os.path.join(args['work_dir'], 'lf_model_' + '.pth.tar'), is_better)
  #     save_checkpoint({ 'epoch': epoch ,
  #         'state_dict': comment_model.state_dict(),
  #         'best_loss': eval_loss,
  #         'best_acc' : eval_acc,
  #         'monitor': 'eval_acc',
  #         'vpm_optimizer': optimizer.state_dict()
  #     }, args, 'shrey', os.path.join(args['work_dir'], 'comment_model_' + '.pth.tar'), is_better)
      
  print("Training complete")

  load_weights(model_name, lf_model, comment_model, args)

  print("Best Model loaded")

  test_loss, test_acc, test_pred, test_label = eval_one_epoch(test_loader, 0, 'Test', device, criterions, lf_model, vision_model, comment_model, multitaskloss_instance, args)
  print('Test: loss {:.4f}\taccu {:.4f}'.format(test_loss, test_acc))
  np.save(f'{args["work_dir"]}/npy_files/test_preds_{model_name}.npy', np.array(test_pred))
  np.save(f'{args["work_dir"]}/npy_files/test_labels_{model_name}.npy', np.array(test_label))

number of available devices: 0
obtained dataloaders


Some weights of the model checkpoint at allenai/longformer-large-4096 were not used when initializing LongformerModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loaded models
Training complete
Best Model loaded


KeyboardInterrupt: 

In [None]:
# What version of Python do you have?
import sys
import platform
import torch
import pandas as pd
import sklearn as sk

has_gpu = torch.cuda.is_available()
has_mps = getattr(torch,'has_mps',False)
device = "mps" if getattr(torch,'has_mps',False) \
    else "gpu" if torch.cuda.is_available() else "cpu"

print(f"Python Platform: {platform.platform()}")
print(f"PyTorch Version: {torch.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print("GPU is", "available" if has_gpu else "NOT AVAILABLE")
print("MPS (Apple Metal) is", "AVAILABLE" if has_mps else "NOT AVAILABLE")
print(f"Target device is {device}")

Python Platform: macOS-13.3.1-arm64-arm-64bit
PyTorch Version: 2.1.0.dev20230528

Python 3.9.16 | packaged by conda-forge | (main, Feb  1 2023, 21:38:11) 
[Clang 14.0.6 ]
Pandas 2.0.2
Scikit-Learn 1.2.2
GPU is NOT AVAILABLE
MPS (Apple Metal) is AVAILABLE
Target device is mps


In [None]:
torch.device('mps')

device(type='mps')