In [1]:
import numpy as np
import pandas as pd

In [2]:
labels = np.load('../npy_files/28Jan/test_labels_floating-snake-10.npy')
model1 = np.load('../npy_files/28Jan/test_preds_floating-snake-10.npy')
model2 = np.load('../npy_files/27Feb/test_preds_comic-star-62.npy')

In [3]:
mistakes_model1 = list(np.not_equal(labels, model1).nonzero()[0])
mistakes_model2 = list(np.not_equal(labels, model2).nonzero()[0])

In [4]:
mistakes_corrected_by_model2 = list(set(mistakes_model1) - set(mistakes_model2))
mistakes_corrected_by_model1 = list(set(mistakes_model2) - set(mistakes_model1))

In [5]:
test_df = pd.read_csv('../data/with_aug_ttv/test.csv')
meta_df = pd.read_csv('../data/extra_data_trans.csv')
meta_df['desc'] = meta_df['key_phrases_desc_bert']
meta_df['transcript_size_increase_to_copy_stuff_easily'] = meta_df['key_phrases_transcript_bert']
test_df = pd.merge(test_df, meta_df, how='left', on='url')
test_df.drop(['transcript', 'key_phrases_desc_long', 'key_phrases_transcript_long', 'key_phrases_desc_bert', 'key_phrases_transcript_bert'], axis=1, inplace=True)
other_comments_data = pd.read_csv('../data/extra_data_other_comments.csv')
test_df = pd.merge(test_df, other_comments_data, how='left', on=['url', 'comment'])

In [6]:
pd.options.display.max_colwidth = 5000

In [7]:
mistakes_df = test_df.iloc[mistakes_corrected_by_model1]

In [None]:
mistakes_df.to_csv('./mistakes_not_corrected_by_video.csv')

In [48]:
import torch
from torch import nn
from transformers import LongformerModel, LongformerTokenizer
from transformers import BertTokenizer, BertModel

class LFEmbeddingModule(nn.Module):
    def __init__(self, args, device):
        super(LFEmbeddingModule, self).__init__()
        self.args = args
        if 'longformer' in self.args['model']:
            self.lf_model = LongformerModel.from_pretrained(self.args['model'], output_hidden_states=True).to(device)
            self.lf_tokenizer = LongformerTokenizer.from_pretrained(self.args['model'])
        else:
            self.lf_model = BertModel.from_pretrained(self.args['model'], output_hidden_states=True).to(device)
            self.lf_tokenizer = BertTokenizer.from_pretrained(self.args['model'])

        self.device = device
        modules = [self.lf_model.embeddings, *self.lf_model.encoder.layer[:self.args['freeze_lf_layers']]]
        for module in modules:
            for param in module.parameters():
                param.requires_grad = False
        
        
    def get_embeddings(self, comments, titles, descriptions, transcripts, other_comments):
        indexed_cs = []
        max_len_total = self.args['max_len']
        max_len_title = self.args['title_token_count']
        max_len_desc = self.args['desc_token_count']
        max_len_trans = self.args['transcript_token_count']
        max_len_other_comments = self.args['other_comments_token_count']
        padding = 'max_length' if self.args['pad_metadata']else False
        for comment, title, desc, transcript, other_comment in zip(comments, titles, descriptions, transcripts, other_comments):
            enc_c = []
            if self.args['add_comment']:
                enc_c = self.lf_tokenizer.encode_plus(comment, max_length=max_len_total, padding=False, truncation=True)['input_ids']
            if self.args['add_title']:
                enc_t = self.lf_tokenizer.encode_plus(title, max_length=max_len_title, padding=padding, truncation=True)['input_ids']
                if len(enc_c) == 0:
                    enc_c.extend(enc_t)
                else:
                    enc_c.extend(enc_t[1:])
            if self.args['add_description']:
                enc_d = self.lf_tokenizer.encode_plus(desc, max_length=max_len_desc, padding=padding, truncation=True)['input_ids']
                if len(enc_c) == 0:
                    enc_c.extend(enc_d)
                else:
                    enc_c.extend(enc_d[1:])
            if self.args['add_transcription']:
                enc_tr = self.lf_tokenizer.encode_plus(transcript, max_length=max_len_trans, padding=padding, truncation=True)['input_ids']
                if len(enc_c) == 0:
                    enc_c.extend(enc_tr)
                else:
                    enc_c.extend(enc_tr[1:])
            if self.args['add_other_comments']:
                enc_oc = self.lf_tokenizer.encode_plus(other_comment, max_length=max_len_other_comments, padding=padding, truncation=True)['input_ids']
                if len(enc_c) == 0:
                    enc_c.extend(enc_oc)
                else:
                    enc_c.extend(enc_oc[1:])
            enc_c = enc_c[:max_len_total]
            enc_c.extend((max_len_total - len(enc_c))*[self.lf_tokenizer.pad_token_id])
            indexed_cs.append(enc_c)
        indexed_cs = torch.tensor(indexed_cs).to(self.device)
        embedding = self.lf_model(indexed_cs)
        return embedding


In [49]:
args = {
    'model': 'bert-large-cased',
    'add_comment': True,
    'max_len': 512,
    'add_title': True,
    'title_token_count': 40,
    'add_description': False,
    'desc_token_count': 80,
    'add_transcription': False,
    'transcript_token_count': 200,
    'add_other_comments': False,
    'other_comments_token_count': 512,
    'pad_metadata': False,
    'freeze_lf_layers': 23,
    'multilabel': False,
    'add_video': False,
}
device = torch.device('cpu')
lf_model1 = LFEmbeddingModule(args, device)
# comment_model1 = CommentModel(args).to(device)
criterion = nn.BCELoss().to(device)

Some weights of the model checkpoint at bert-large-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [50]:
args['add_video'] = True
device = torch.device('cpu')
lf_model2 = LFEmbeddingModule(args, device)
# comment_model2 = CommentModel(args).to(device)
criterion = nn.BCELoss().to(device)

Some weights of the model checkpoint at bert-large-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [51]:
import os
def load_weights(lf_model, device, run_name):
    lf_path = os.path.join(f'../models/lf_model_{run_name}.pth.tar')
    # comment_path = os.path.join(f'../models/comment_model_{run_name}.pth.tar')
    lf_checkpoint = torch.load(lf_path, map_location=device)
    # comment_checkpoint = torch.load(comment_path, map_location=device)
    lf_model.lf_model.load_state_dict(lf_checkpoint['state_dict'])
    # comment_model.load_state_dict(comment_checkpoint['state_dict'])
    return lf_model#, comment_model

In [52]:
# lf_model1, comment_model1 = load_weights(lf_model1, comment_model1, device, 'floating-snake-10')
lf_model1 = load_weights(lf_model1, device, 'floating-snake-10')
# lf_model2, comment_model2 = load_weights(lf_model2, comment_model2, device, 'comic-star-62')
lf_model2 = load_weights(lf_model2, device, 'comic-star-62')

In [53]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
STOPWORDS = stopwords.words('english')

[nltk_data] Downloading package stopwords to /Users/chief-
[nltk_data]     blackhood/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [54]:
import numpy as np
latex_special_token = ["!@#$%^&*()"]

latex_special_token = ["!@#$%^&*()"]

def generate(text_list, attention_list, latex_file, color='red', rescale_value = False):
	assert(len(text_list) == len(attention_list))
	if rescale_value:
		attention_list = rescale(attention_list)
		attention_list = [x if x > 0.0001 else 0 for x in attention_list]
	word_num = len(text_list)
	text_list = clean_word(text_list)
	new_attention = []
	new_text = []
	prev = ""
	ind = 0
	while True:
		if ind >= len(attention_list):
			break
		cur_word = [text_list[ind]]
		attention_score = attention_list[ind] 
		while ind + 1 < len(attention_list) and text_list[ind + 1][0:4] == '\#\#':
			cur_word.append(text_list[ind + 1][4:])
			attention_score = max(attention_score, attention_list[ind + 1])
			ind += 1
		
		ind += 1
		new_attention.append(attention_score)
		new_text.append("".join(cur_word))

	attention_list = new_attention
	text_list = new_text

	new_attention = []
	new_text = []
	for i, _ in enumerate(attention_list):
		if(text_list[i] not in ['[CLS]', '[SEP]']):
		#if attention_list[i] > 2: and text_list[i] not in STOPWORDS:
			new_attention.append(attention_list[i])
			new_text.append(text_list[i])
	
	attention_list = new_attention
	text_list = new_text
	word_num = len(text_list)
	with open(latex_file,'w') as f:
		f.write(r'''\documentclass[varwidth]{standalone}
\special{papersize=210mm,297mm}
\usepackage{color}
\usepackage{tcolorbox}
\usepackage{CJK}
\usepackage{adjustbox}
\tcbset{width=0.9\textwidth,boxrule=0pt,colback=red,arc=0pt,auto outer arc,left=0pt,right=0pt,boxsep=5pt}
\begin{document}
\begin{CJK*}{UTF8}{gbsn}'''+'\n')
		string = r'''{\setlength{\fboxsep}{0pt}\colorbox{white!0}{\parbox{0.9\textwidth}{'''+"\n"
		for idx in range(word_num):
			string += "\\colorbox{%s!%s}{"%(color, attention_list[idx])+"\\strut " + text_list[idx]+"}\n"
		string += "\n}}}"
		f.write(string+'\n')
		f.write(r'''\end{CJK*}
\end{document}''')

def rescale(input_list):
	the_array = np.asarray(input_list)
	the_max = np.max(the_array)
	the_min = np.min(the_array)
	rescale = (the_array - the_min)/(the_max-the_min)*100
	return rescale.tolist()


def clean_word(word_list):
	new_word_list = []
	for word in word_list:
		for latex_sensitive in ["\\", "%", "&", "^", "#", "_",  "{", "}"]:
			if latex_sensitive in word:
				word = word.replace(latex_sensitive, '\\'+latex_sensitive)
		new_word_list.append(word)
	return new_word_list




In [55]:
# comments = ['WHITE PEOPLE 🙎🏻‍♀️🙎🏻‍♂️ IN THEIR FINEST MOMENT 👍🏻👍🏼👍🏽']
# titles =["Australia: Thousands take out protests in Sydney against lockdown | COVID | Coronavirus Restrictions"]
# descriptions = ["lockdown protests sydney http google news delhi bring world world truly channel clean respectful aim empower people reportage stand partisan comes politics potted plants water using racist sexist hour updates zee neutral core headquarters new australians channel wion bottles police issues depth people took anti media handles facebook insults subscribe entering fifth week analysis deliver information biased bussiness demanding end people explore turned thousands personal refrain using examines global issues truly discussions provide india http united protesters threw tired biased antilockdownprotests bring news hour non violent website australia slurs journalists neutral subscribe channel http saturday handles wion news day aim"]
# transcripts = ["lockdown protests sydney provide adequate vaccine end 30th potted plants horses 57 people nations government targeting agree case numbers download app news public health orders coronavirus state appears australia managed pandemic escaping completely removed reality violent looked quickly new south paint water bottles restrictions just travel public gatherings struggle spikes touch ground rate developed maskless people let voices heard mercy delta variant 15 australians fully participated anti day authorities suggested flags chanting infections 24 hours feels like adult population end record 163 new flouted unscathed wales level largely control officers physically appears poised extend growing anger restrictions populous state weeks remain place october supplies demonstration came covid lockdown contain outbreak arrested city waving australian lot demonstrators flouted rules year available country people marched australians happening happening covid restrictions growing state reached failure provide million people lockdown non essential just completely download escaping early pandemic variant australia despite assaulted police horses people arrested protest spikes infections mercy extend lockdown end october protest turned turned outbreak adults 163 new infections physically orders 3000 july despite struggle managed half just speak rules non defying public sydney defying public early appears adequate 19 water australia populous state state weeks lockdown hours australian flags restrictions government failure australians fully vaccinated control 15 new physically assaulted authorities ground lot people health contain largely suggested cities quickly pandemic level largely participated anti lockdown bottles police covid 19 case threw end lot people agree developed quickly protesters threw 57 people arrested lockdown cities growing government targeting vaccination let australia 25 million just completely removed news australians participated anti came like travel heard weeks unscathed half australia struggle place people like happening covid country download app protest demonstration came thousands australians defying public health lockdown end 30th population vaccinated rate marched numbers state pandemic unscathed available day app protests sydney defying plants paint nations remain covid violent delta"]
# other_comments = ["protesting fweedumb mate china vaccine power lockdowns end intravenous antibiotics government guidelines just end civilization worried contains graphene oxide getting australia use facemask undermining pfizer moderna distrib flatten curve proving counterproductive watch fought freedom lifetime covid refuse footage telegram news funded faucci wuhan ouida health aren protected attacking path risk mutation reaction kill babies womb thank god haven seawater milligrams liter india burning bodies martial law coming screens just cases science govt blm milking content outcry collar sent minutes suggesting symbiotic relationships deference spread virus want christmas cancelled just heart haha schools learn home cancer patients ahead churches mosques just swab just people just western world irony 10k travel limit labelled anarchists movement speech paris lower oxygen intake free drugs destroying unsubcribe use myob concern problems flu died right ways including current start playing basketball certainly don listen arent stupid hypocrisy barbar avoid mass juga unknown reasons approach just life ah hell nah function carriers state injected drivers license 19 2020 45 restrict affeçt theoretically elected democratically robot humans seeing boggling sane horses closer paternal grandpa blacks compared called developed stand bullshit 99 like hitler passport just stores imagine agroup guys shared cakes work support decided wish waste continue chase bastards tactical vests overalls nah join government said trick gffggjr use live bullets worldwide waiver signed time politician wall pht possible don need 3rd started violence avoid important israel japan americans smiling face bio eagle trophy police calling jews blessup acceptable level dissolved legislation fish kill vaxx cards distrib list just supposedly represent need batons fools poor pandemic control taking reading comments fight lies care riot pillage people pay homage fist medium skin energies considering protest cops travel jones anchor sky blowing points intensely second wave poor reporting shit receiving jab passed singapore approach won care ask solution stupidity british jail prisoners facility separate just outcry countries attention trees probably kind bahrain qatar south scared just weeks poison let wait steal portland face understand tyranny vote just concert human rights proving brazil indonesia protesting checking censorship just agency ies bfar australians believe virus outback looked appealing lazy wion wion millions ignoring huge swaths called american patriots better soon just enjoy endless makes total sense nsw police chief firing innocent unarmed prudent cease world gone cracked floor laughing ya orang rates fewer 14 deity allan cashing health justify relaxing implements raised eyebrow community shutting economy limiting dr foulcheeze intensely selfish medical information bred toughness generation kruger affect epic power people raised lost minds majority sent mail government acceptable obey sovereign society aren allowed leave think stuff making deaths total continent tell different way just unjust distance public right just replace gonna drop dead primates trapped rishikesh 16 locked like south angry young braver today gangs fully chat don use hey forcing history just travel states indians behaved don intend fox news style problems country salute did 11 terrorism agressors australia flexed biceps hurt bastards town thank ensured gained function today wearing masks wow really ve pneumonia times know aunty looks like time hope continue oppressors fighting free station bs people know safe markings temporary treatment allow deference rule flu lockdown fascistic ask anzacs willingly cope war need gyms salons spas wish americans balls cose feel living decision makers baby gathering led taking couple rolling mate claiming research better autocracies just just shit obvious yo non essential businesses course mass unrest children chinese freedoms joy news remember let know government people sick authorities telling years just stop overwhelming doctors say hard protestors remember aussies whyrus good mass media trust developing 1984 society lowest iq country areas just government ahead loby liter according government lesson minimise appealing fucking world 3rd world channels app just tracing exists ganges weeks kumbha issue lol don fit label funny prolong use stand ppl ve earned lol don want governance sort just metres just months endured 2nd infect family destroying black better millions healthcare extent sea lowering right kill people suriname netherlands truth police started help football brainwashing catering lowest doing help protect victim just save willingly let thinking face martial dunning remember china created survival rate time vid american claiming won stop 958 cowards country rupert risks illnesses human pubs just send unwell receiving deserve consideration like india brazil construction industry 10k useless fweedumb virus want protesting authority theoretically cure worse doses don happen australia battle bio ve contact age 80 chicago free work world lot ways speak compliment sunglasses lost ur just scientific fact 11 teach science huge police state italy protesting hmmm mandatory vaccines shut morons concern delicate australia government health limiting freedom movement reaction convicts good robots bodies september serving certainly distancing goes grinning caught vaccine idiots covid embracing south korea getting implements lockdown china right rights australia imagine policemen garbage people cooperate government solve skin tone australia wants nature happy birthday shared resisting ur injected deadly virus democracy stress self aren ies sick vulnerable flu looks idiot theories australia protest ah hell east instead play just non comments mind boggling behaved major stores wear sense fight death week proud people going including children 50000 sydney quickly face tears law just second trying hardest bne fighting going world attention social fascistic unnecessary course granny just lockdown human possibly send kids makers protestors covid 958 covid deaths real better way world sees supports limit black community freedom virus government forcing world people don taking away people politicians lost major baby tossers care homes just ur freedom reason slave lads don know law jail fine laughing tossers virus learn just good leave fate democracy developed nations face lol violent angry restriction gffggjr australia worldwide replace flu covid deadly suggesting basketball protest undermining life times wuhan china scientists just scared oppressive moderna fucking sorry hell gone use order protect kill low bne vaccine 50"]

# comments = ['They featured kim in this. She should be ashamed of herself']
# titles =["What’s Changing About Fashion’s Relationship to the Body? | Vogue"]
# descriptions = ["makeup artists fara size body acceptance company stadium embrace diverse winding imaan hammam assistants nolan eakin runway models shape maximilian pittner production valletta speaks director camille auras 1st kang talent jeneil anura idupuganti rebecca purshouse bbe valdes joffroy jamois director photography designer berlin arabella romen hair peterson production summers hair assistants rei road maya singer hasmik ariel shea hardy manicurist kim kardashian west miranda 1st fashion quante stylist amber executive producer art asst mathias peralta gaffer moore talent ad la brytscha talent mia sarr long loader grau elliot soriano sergey nikitenko 1st valli fashion director hayden sylvia wheeler rabea ac victor braider starsha appling paloma elsesser"]
# transcripts = ["woman fit certain charge new anymore marks spices forward look camera feel like body born body skin warmth softness say really impresses narrative means ageless lot shape size turn love grabbing fat hayek like jennifer imposed trans supposed look exist takes don really opened having heard stretch just sounds stupid big seeing baby journey beginning caricatures means man coarse fits dimpled people commenting instinct really muscles beautiful beautiful prove turn hips salma world strength didn sold alive supposed time moving sacred know lopez eyes ve seen having man cause like represents started women like kind just appreciate fits life till womanly strong don really bits ok like look body way power anymore like just like prove woman didn stretch marks like skin man woman imposed certain person shape camera ve sold say size glad baby just world sounds love seen necessarily don know know feel ageless glad means new stepping supposed exist way felt right body filled stupid time know body journey trans people body way like ve pretty body fat just beautiful forward dimpled bits feel impresses body instinct like jennifer lopez appreciate body lot time say really really like body body sacred eyes just like beautiful beautiful way alive big look way like felt like prove means man woman till takes grabbing body know started felt heard ok body strong muscles moving forward look glad charge turn born didn know womanly age don know women alive supposed look commenting person shape jennifer like age necessarily exist like just sounds represents started camera fit certain way felt like don really feel beginning life softness coarse spices big seeing just really impresses body sold seeing baby just person look camera ve represents started women hips right size people body ageless lot caricatures way like opened cause power body body time fat just appreciate strength didn know supposed look marks moving fit certain narrative dimpled size glad charge"]
# other_comments = ["body vogue subtly time diversity size share accepts responsibility ones drive plastic men red heart kim didn play psychological impact gym relentlessly worry women hibiscus woman years hypocrisy implemented modelling industry young girls eating surgery having video isn beautiful family filters photoshop ect using slim built comments pressure coming ahhhhhhh great short overweight obese women making like exists possible forget read frickin hairline altered standards saying hasn size shape ideologies week honestly american don need cover oppressed mainly types reason doing kardashian consumers make finish change life physiques sample kinda like jlo ruining women self chapters psalms start skull skull weary lovley lol fact disorders body dysmorhpia relationship bodies refreshing talk talk brown positivity body diversity saw fav sydney acceptance trendy believed billionaires ironic casting single amazing way idc growth evidently just creepy asses think promoted people fit today gift called pedaled propaganda version released cashing aspect clown media creates unhealthy lately ok hand feel like gainning claims look like face tears woman dancing medium present tandem apology let bring ages ethnicities represented pressured body issues setting online hate model patriarchal body standards mia kang house hayek heroine chic pushing course old ideal niped tucked esteem cute plus world scenes absolutely absolute source stress complex story forced buy politically correct glad sort defeats entire amt omg vouge great example comfortable faith wishes cover shame struggle attain perfect stigmas stereotypes pressure irresponsible power make love discourse criticism self image changing bit contradictory putting men fashion advertised dark skin tone map say directly bcuz missing selma just bodies looked lol ladies let know norme actually norme models current body literally perpetuated altered speak european countries france yes absolutly beauty industry help ago companies like big booty buy magazines pay fat love message women body constantly apply kind say want decades trying fit people aren insecurities trying body considering fact shape person want industry just does saviour smart ummmmm season different shapes society guys literally awwwww jenners belong stats interested sell look like latino good things possible fun thank just rumored evidently purrrr queeens god god moment pass make great job shoutout section conversations important unrealistic body expectations say didn contribute turn pages movement ladies heart suit kim applause creators history tomorrow mocking point changing fashion relationship damaging eurocentric seeing ultra trash shot having capitalism miss milan model problem basically started especially models women won like fit tired doctored strength men slightest body types years rose growing real proverbs till making young impressionable mention black women say oppose hijjabbi publicly change body don product people criticisms appropriates whats popular rectifying lol funny definitely role knife times count millions girl confidence absolute bell aren obligated looking kinds foinneee train important yellow hope aware don aesthetic pressure hypocritical production lol purrrr sample sized model promoting unattainable maybe share wtf oppressor fight 50 instead like woman used body acceptance kardashians acting like pay attention victim wooooooh fantastic beating size women trans just coz body piece clothing online plastic surgery shame biggest companies install think different included expectations kim sisters fitness men step today easier beauty hope just parody dreams come relentlessly trending irresponsible pushing anorexia young honestly insulting know kim kardashian tired glad season insane think innocent athletic positive showcasing held saying looks deeply external force standard finally realizing dude iconic photo stop featuring salma everyday help ownership vogue perpetuated reason young women fit nice positivity heart underweight models growth apologize strange changes models let credits house smiling face diversity honestly shocked better peace successful whats make best decision girls rude don does sound death truth unrealistic beauty appearing women dont photoshop subjected standards insane ect social revolving hearts revolving women talking video bravo source skinny entire purpose innocent setting body worry people won isn ok title says exactly abled single handedly responsible middle aspect aggravating let pretend didnt feel good compelled unhealthy body kim video ironic perfect body maybe inflicted everybody don stress comes people refreshing company present trend girl mia answer shouldn change men allowed fat surgeries share struggle ones desirable built glad fashion industry body norme stories money selling insecurities vogue love video like videos like just ridiculous represented fashion ironic money change kim looks kinda like content applies women fashion anybody know joy life right allowing film amazing message standards time product ummmmm sure belong wow awesome jennifer lopez different facets india make feel better rest wanna white don fav politically place start desirable body types hate model obese social media creates plus size profited deeply damaging ve women forced soon guarantee just way hey person preaching calm biggest love video bodies bad vogue oppressor camera used saw missing think beautiful eyes thumbs message great promoting making thank vogue red mystery body title acceptance women big like bit kim women dont beauty standards feel really guys yesterday generations think heart guys say true beautiful yes thumbs vogue acting ppl women faith certainly don look read holy sell surprised didn body way money course standards attainable plastic fashion huge impact fortune making finally impact choices women women make billionaires men just don vogue perpetuator negativity talking unobtainable size larger wish energy clown face big comes people relationships wow fashion week fight body confidence im hater sorry vogue credibility create standard unrealistic kardashian saying isn count omg purple don live according body image women love wrong wooooooh women feel good vogue pushes unattainable pushing look different appearing sisters entire people fit beauty booty women map doing don let make especially india vogue know kind millions readers turn younger women muscular athletic skinny models casting advertised using mentioning speak directly doesn feature reasons place self criticisms fun thank preaching accepting body make video 90 shame vogue didn install perfect size says body wish constantly changes pressure coming kim true pray time generations insecurity women female exists media apply differently abled bodies"]

# comments = ['WHO working like Wuhan Health Organization']
# titles =["‘Can’t force China…’: World Health Organization on Covid origin controversy"]
# descriptions = ["origin coronavirus china transparent details updates hindustan times second phase studies countries demanding telegram http ryan director agency health organisation said cov watch video details emergence sars begin soon added said compel press bell icon country divulging data needed understand virus investigating amid origin michael mike facebook times like data covid icon youtube latest emergencies china world instagram http linkedin second investigating origin programme details connect sars cov latest updates understand covid origin china transparent details studies needed needed virus origin begin emergence compel country china world health youtube details connect hindustan hindustan times telegram divulging http twitter video details times press soon countries coronavirus"]
# transcripts = ["organization compel china multiple outbreaks emergencies open transparent agree rules does power investigate express permission origins sars v2 continue work parties 194 understanding knowledge level preventing world health phase propose necessary vast majority extremely polio education clean happened wuhan constitution member state investigate enter countries future terms case covid origins study order year remarkable cooperation basis worked extremely smallpox eradication dealing thank question just perspective task continue second dealing multiple established just remind doesn rules perspective powers easy task v2 important important compel work country persuade order able terms preventing pandemics come determine diseases taken work consensus worked response emergency response world understands origins power compel world regards sense does state organization sense necessary permission cooperation country china open china come clean especially case determine member majority countries engaging states agree propose necessary studies health organization persuade taken years wuhan world occur enter propose able understand country basis organization second education happened eradication dealing multiple question compel regard countries engaging outbreak basics countries express doesn important important future remarkable persuade china cooperation vast majority emergencies occur year parties especially investigate continue health organization compel thank sense does power organization established constitution response terms preventing level open work cooperation compel china come agree dealing organization just engaging sense 194 member powers investigations outbreak 194 member states important world understands covid diseases extremely pandemics easy determine work sense perspective powers compel clean studies understanding diseases taken years powers compel work world vast regard regards china covid origins important response continue work state outbreak response majority countries education smallpox polio especially case covid years organization sense 194 power investigate enter permission able wuhan world health important outbreaks power compel regard consensus compel world task organization persuade china agree rules perspective preventing pandemics easy worked extremely polio countries express permission engaging outbreak express years study china open transparent cooperation country basis constitution world health organization"]
# other_comments = ["china health demons totally transparent office wars disputes database offline september religion people died inconveniences giver placing loyal dogs labrador let talk ebola countries paying travel learns origin sars crores single time question existence alternative succeed corona accidents mishappenings guys answers guys does ccp post high voltage virus proof buddhist wuhan news timeline tears joy supporting fairfax county population majority doesn dissolve use literally hear world 19 symptoms people running http http feel dumb useless stop funding corrupt example king disputes buddhist population force nahi good job dont tell bad organization rest world modus operandi dr evidences long atrocities non cooperation mouths face responsible sorry hear 2019 lab workers mike pompeo pandemic failed countries joke collaboration caves asking friend officials undergo life puppet expel incompetent ups false religion corona originator spread summer reestablish bcoz australia just like shut baised scared issue difficult resides people statement interested statement job prevent having na samajh sakte history kashmir globalists lying v3 come years ji information transparency investigation fauci learned folded hands disputes diseases live needs ball people vaccine chins useless scumbags think members openly vastu pull waste health did release cdc test say basically ass pouting tactics pretty spain ryan bribed china transparent china believe style check remember uno ok dear friend 17 2019 absolute payment successful folded patient hour fort learn loyalty wuhan lives unite kick markson sky 1st wi fe precious lives world know origin symbols simp beijing worthless hospital covid ask sharri overcome deadliest bed newz damaad placing things illness power compel developing died trillions need governing non people conclusion disgrace africa world people user lots inconveniences countries live corrupt aligned india countries pull eyes broken vacation fuck people post armed believes anymore joke lost joe biden decision trust time disputes lakhs years stop check investigate receive deny accuse basic china cowards wuhan propaganda china freezed approve doctor request knew chinavirus did say vaccine money demand thing till detrick local useless organization india ago new staff giver religion single war religion family staff puppet china wiped real answers fuking 2019 world faced hide force china years sad china sold diseases live religions crucial came country speak body people looking absolute proof patient cooperation cover taken necessary health organisation corrupt request blaimed single death buddhist kar undergo praise held developing countries force step involved does mishappenings population idols approved fake timeline corona virus totally number people know blame wuhan lab force oh hear think ok going force sorry literally late china wiped south truth openly praise china home office lab pulled database china held responsible compel propaganda wuhan health news australia world forced china sorry literally need ask dog stuff large money labs pouting face paying cov force running organisation shut members simp relieved single person mouth force china hain na question existence china history single wars till history dog food china husky german deaths pathetic bloody freezed guards proof buddhist religion china believe rest important world bat country population origin virus wuhan china labs pouting fuck september receive money china pretty religion resides rolling years michael health issue alternative cover ups deadliest pandemic joke virus afraid transparent points doing good organization totally irresponsible said people corona thing county july fake chinese vaccine late bs people got upto completely hopeless taken situation just joke important bad thing religion majority dr death demand workers checked rolling eyes china long ago accidents china atrocities failed incompetent use learn died south africa power compel china supporting china joe know food management people responsible millions proof got forced viruses recruited like heart died corona blaimed statement don power cooperating investigation wuhan patient corrupt useless dont live collaboration china regarding oh hear buddhist china pouting face basic symptoms hide deny let origin givers single deaths inconveniences single world problem chinese http virus fairfax test rest test virus doctor single war corona afraid religion sad relieved regarding real china ok going china stuff large wuhan high corrupt aligned china information buddhist people dispute appeared ask china did release freezed approve vaccine chins useless organization india dont joy user crying face trust difficult admit died war german shepherd dissolve transparent face china 2019 world action dissolve sars cov damaad checked world people lost china globalists happens religion post face worthless decision investigation late china continue diseases like prevent pandemic speak truth china local people know undergo life style diseases like corona body come bcoz need new world use live religions timeline virus fact 1st health org needs members people world feel disputes deaths inconveniences scared china action stuff kick ass supporting hear world china happens hour spain africa got joy supporting china good illness happens investigation late china developing idols vastu religions placing overcome decision world let talk unite doesn operandi funding china china real example waste travel majority single death guys force majority doesn wars person asking friend face dogs basic ccp modus millions answers looking rest world know world learns forced china need believe anymore pull countries fuck people religion people single uno management need remember corona dispute hands china relieved face bed organization disgrace world givers single buddhist officials countries pull waste beijing didn transparency world job religion died buddhist cov force china checked hospital sorry hear world facing just like mike"]

comments = [test_df.iloc[126]['comment']]
titles =[test_df.iloc[126]['title']]
descriptions = [test_df.iloc[126]['desc']]
transcripts = [test_df.iloc[126]['transcript_size_increase_to_copy_stuff_easily']]
other_comments = [test_df.iloc[126]['key_phrases_other_comments']]

In [56]:
input_ids, token_type_ids = lf_model1.get_embeddings(comments, titles, descriptions, transcripts, other_comments)
# print(input_ids.shape)
attention = lf_model1.lf_model(input_ids)[-1]
attention = attention[23].squeeze()[:,:,0].sum(axis=0).tolist()
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = lf_model1.lf_tokenizer.convert_ids_to_tokens(input_id_list) 

words = tokens
word_num = len(words)

color = 'red'
generate(words, attention, "sample1.tex", color, rescale_value=True)

ValueError: too many values to unpack (expected 2)

In [123]:
input_ids, token_type_ids = lf_model2.get_embeddings(comments, titles, descriptions, transcripts, other_comments)
attention = lf_model2.lf_model(input_ids)[-1]
attention = attention[23].squeeze()[:,:,0].sum(axis=0).tolist()
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = lf_model2.lf_tokenizer.convert_ids_to_tokens(input_id_list) 

words = tokens
word_num = len(words)

color = 'red'
generate(words, attention, "sample2.tex", color, rescale_value=True)