In [1]:
%load_ext autoreload

In [48]:
%autoreload
import sys
sys.path.insert(0, '../')

In [49]:
from src.utilities.mluar_utils import *

In [43]:
from transformers import AutoModel, AutoTokenizer
from datasets import load_dataset, Dataset, load_from_disk
import numpy as np
from einops import rearrange, reduce, repeat
import torch
from sklearn.metrics.pairwise import cosine_similarity
from matplotlib import pyplot as plt
import math
import pandas as pd
import pickle as pkl

pd.set_option('display.max_colwidth', None)


In [5]:
MULTI_LUAR_PATH =  "/mnt/swordfish-pool2/milad/multi-luar-reddit-model/"
LUAR_PATH =  "/mnt/swordfish-pool2/nikhil/LUAR/pretrained_weights/LUAR-MUD/"

In [6]:
# Load models
multiluar_model = AutoModel.from_pretrained(MULTI_LUAR_PATH, trust_remote_code=True)
luar_model = AutoModel.from_pretrained(LUAR_PATH, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("rrivera1849/LUAR-MUD")

In [52]:
# Load data
data_path = '/mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/data/hrs_06-27-24_english_perGenre-HRS2.1_TA2_input'
ground_truth_path = '/mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/groundtruth/hrs_06-27-24_english_perGenre-HRS2.1_TA2'
hiatus_data, _, _ = load_aa_data(data_path, ground_truth_path)

Loading:  /mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/data/hrs_06-27-24_english_perGenre-HRS2.1_TA2_input


In [53]:
# keep authors with only more than one text
authors_with_multiple_texts = [x[0] for x in hiatus_data.authorID.value_counts().to_dict().items() if x[1] > 1]
hiatus_data = hiatus_data[hiatus_data.authorID.isin(authors_with_multiple_texts)]

In [54]:
hiatus_data = hiatus_data.sample(2000)

In [55]:
hiatus_data_texts = hiatus_data.fullText.tolist()
labels = hiatus_data['authorID'].tolist()

# Compute embeddings
max_seq_length = 736
hiatus_data_embeddings = get_luar_embeddings(hiatus_data_texts, multiluar_model, tokenizer, max_length=max_seq_length, batch_size=1, is_multi_luar=True)

In [10]:
#np.max([len(x.split()) for x in hiatus_data.fullText.tolist()])

### Experiment Design

- For each layer, we find pairs of ground-truth texts written by the same author where the corresponding layer's embedding scored them high compared to other layers
- For each layer, we take a sample of these pairs of texts and prompt ChatGPT to find which linguistic level they are similar

#### Step 1:

In [56]:
# Compute Multi-luar similarity matrices at every layer and average of the layers
muti_luar_layers_sims = [compute_similarities(hiatus_data_embeddings, hiatus_data_embeddings, layer=i) for i in range(7)]
#muti_luar_layers_sims.append(compute_similarities(hiatus_data_embeddings, hiatus_data_embeddings, layer=None))
muti_luar_layers_sims = np.stack(muti_luar_layers_sims)

In [57]:
# Compute significant pairs to layers
layer_to_sig_pairs = {layer: extract_sig_pairs_for_layer(hiatus_data_texts, muti_luar_layers_sims, layer) for layer in range(7)}

In [59]:
layer_x_pairs = []
for layer in [0, 1, 2, 3, 4, 5, 6]:
    print(layer, len(layer_to_sig_pairs[layer]))
    sample_of_pairs = layer_to_sig_pairs[layer][:10]
    layer_x_pairs += [{'text-1': x[0], 'text-2': x[1], 'z-score': x[2], 'layer-sim': x[3], 'layer': layer} for x in sample_of_pairs]
layer_x_pairs_df = pd.DataFrame(layer_x_pairs)

0 9984
1 3
2 20
3 56
4 1
5 5
6 299


In [60]:
layer_x_pairs_df[layer_x_pairs_df.layer == 1].head()

Unnamed: 0,text-1,text-2,z-score,layer-sim,layer
10,"Knots\n\nWatched as part of The Tara Reid Mission\n\nThe Film\nThere's a moment in Knots where <PERSON> character tells his friend that his prolific cheating on his girlfriend is okay because a) Her work takes her away a lot, and b) Sex with other women further solidifies his feeling that his girlfriend is 'the one' and that the sex with her after he's cheated is brilliant. It's this kind of mean spiritedness that defines the film, from the characters to the situations they create for themselves.\n\nSupposedly a comedy from the male perspective, very few of the characters ring true. The men themselves are hapless, bumbling idiots, so sure of their masculinity that they are unable to accept their (many) flaws, whilst the women are either screaming harpies, dumb conquests or manipulative ice maidens.\n\nIt's not all bad though. There are a few genuinely amusing moments, and <PERSON>, who is the one genuinely nice guy, does well with what little the script gives him.\n\nIf this is what modern relationships are like, count me out.\n\nHow's <PERSON>?\nNot appearing until around the 42 minute mark, <PERSON>'s <PERSON> was the only other character I didn't destest. The scenes with her and <PERSON>'s <PERSON> are sweet, and it was nice seeing their relationship blossom, although more time is spent with the other, more hateful characters. <PERSON>'s well documented of screen partying seems to have taken it's toll though, as she sounds like she's smoked a pack of cigarettes every time she opens her mouth.",DNRs\n\nSeriously what is the fucking point of DNRing someone if the other people keep letting them come back. As well as the owner and gm???????\n\nFor instance one guy stayed a few months back had gotten into it with the owner and has an aggressive pitbull (no not all pitties are aggressive but this one is)\n\nThe owner told me to add him to the long list of dnr. Because they smoked in the room has the aggressive dog and trashed the room.\n\nThe owner told him not to come back literally to his face.\n\nSo i come in after being off yesterday and lowandbehold he has come back. 🙃\n\nI say something to the owner but they act like they dont give two shits\n\nSo what was the whole big deal last time then?\n\nHe comes to the desk today wanting to basically to the standard room and upgrade cause hes a starrbellysneech memeber.\n\n(You dont get an upgrade on booked out nights and its only to the suite not studio if its avaliable)\n\nAnd of couse they gotta do this at 11 right at check out time. With a fuck ton of shit packed into their room on a (1 night stay)\n\nHousekeeping knocks like 4 times they keeps shewing them away so the owner tells the guy hey its 1130 yall need move to the new room.\n\nOf cousrse he starts to argue saying i didnt let him check back in Which is a lie.\n\nHis phone signal was shit so he had to go outside and book it cause he didnt want to pay the higher rate.\n\nFinally it came through and i check him in within like 2 min. So no dude it wasnt me. You knew what time it was your reservation due to your shitty ass broken phone was the one who was fucking it up not me.\n\nHe had the dog and it still caost him like 180 woth the pet fee. And i charged it out before he left my desk. He is one of those who will turn their card off. Im making sure we got the money.\n\nAnd i blocked out the last standard room on pourpose.\n\nAnyway i jsut had a conversation that with the owner as to why... he said cause its been slow and he needs the money. So he got rid of the list that he himself told me to make.\n\nWe have not been slow this week almost every night we have been booked out. So spare me the bull shit Nile.\n\nI just threw my hands up and said hey om just letting you know. Is what it is.\n\nYou wanna argue with the same dumb asses every time go ahead.\n\n( i am quiet quitting anyways)\n\nI cant take the back and fourth double standard anymore. I do my job and its never good enough.\n\nI do one tiny thing wrong and i get reprimanded for it the other 3 dumb shits that work the desk do major fuck up never get anything said to them.\n\nJust done trying.\n\nIdk anymore what does yalls hotels do with dnrs?,"[0.60251147, 1.7408997, 0.42384166, 0.34203064, -0.9317077, -1.2766064, -0.90100944]","[0.65487826, 0.6717696, 0.65222716, 0.65101326, 0.6321136, 0.626996, 0.6325691]",1
11,"Paris, Texas\n\nIn many ways, Paris, Texas is one of the most significant movies I’ve seen in a while. Technical aspect wise it has some neat camerawork and I can understand why it has garnered so much praise from the audience. The repetitive guitar tune is great as it sets the melancholic and emotive feel in this road trip movie. Some standouts are personally the peep talk/mirror scenes and the hallway scene in green and red lighting to name a few, the dialogue between <PERSON> and <PERSON> was genuinely intense and heartbreaking. One of the things I didn’t like was character wise, how much of an insufferable father <PERSON> is and the way he treats everyone especially to <PERSON> and <PERSON>. At the start, his brother <PERSON> made huge effort to travel to Texas and helped him get back on his feet and to get rid of his “muteness” and “amnesia” and all <PERSON> did was walk away and make things difficult between them. From my understanding, the message of this movie is about reconnection and redemption, when <PERSON> decided to make amendments with <PERSON> and reconnect with <PERSON> in the second half only to drive away and leave them in the hotel because he has come to terms that they will not be happy if they’re back together again. So in conclusion <PERSON> is a miserable dick and <PERSON> and <PERSON> are just better off without him.\n\nAlso I feel like I could use a rewatch at some point, it was a bit of a struggle to watch as there was no subtitles and some mf’s watch was beeping on and off at the third half of the movie and during the climatic scenes which took me out of the movie a little eek\n\nPriority watchlist","62, <LOCATION>, <LOCATION>\n\n<PERSON> was walking in her favorite place, ""<LOCATION>, listening to the river, birds singing and looking in the trees and grass for birds nests and little animals when she was called upon by the Lord to end her earthly journey and continue on alone through the valley of the shadow of death, a journey we all must make alone.\n\nWe know <PERSON> is now at rest with thee in one of the any mansions promised us.\n\n<PERSON> graduated in <DATE_TIME> as key punch operator on the honour roll for Adult Vocational Training. She was also a member and Elder of Farquharson Presbyterian Church and a <DATE_TIME> employee of Alderwood Rest Home, <LOCATION>.\n\n<PERSON> was predeceased by her parents, <PERSON> and <PERSON>; sisters, <PERSON>, <PERSON>, <PERSON> and <PERSON>. Also parent-in-law, <PERSON> and <PERSON>; brothers-in-law, <PERSON>, <PERSON> and <PERSON> and special all time person, Uncle-in-law, <PERSON>.\n\n<PERSON> is survived husband, <PERSON>; son, <PERSON>, grandson, <PERSON>; granddaughter, <PERSON> Special Angel"". She is also survived by sisters, <PERSON>; brother, <PERSON><PERSON>); brothers-in-law, <PERSON> (<PERSON>) and their family; <PERSON> (<PERSON>) and their family.\n\nFuneral service to celebrate the life of <PERSON> will be held on <DATE_TIME> at <DATE_TIME> at Farquharson Presbyterian Church with Rev. <PERSON> officiating. Burial will take place in Middle River Cemetery, <LOCATION>.\n\nMemorial donations in memory of <PERSON> may be made to Farquharson Presbyterian Church, Heart and Stroke Association, any animal shelter or charity of choice.\n\nOnline condolences may be sent to www.pierfuneralhome.com.\n\nLove you for eternity\nWinston\n\nOnline Condolences\n\nI am very sorry for your loss and would like to express my deepest sympathy.\nFrom: <PERSON>\n\nMy deepest sympthy to <PERSON>, <PERSON> and family. My thoughts and prayers are with you all.\nFrom: <PERSON>\n\n<PERSON>, We are shocked and saddened by <PERSON>’s passing. Our thoughts and prayers are with you and your family.\nAll our love, <PERSON>, <PERSON>, <PERSON>, and <PERSON>","[1.414709, 1.554176, -0.59778863, 0.07732854, -1.0408752, -0.97374153, -0.43383268]","[0.8403662, 0.84239817, 0.8110449, 0.82088107, 0.8045893, 0.8055674, 0.81343365]",1
12,"Paris, Texas\n\nIn many ways, Paris, Texas is one of the most significant movies I’ve seen in a while. Technical aspect wise it has some neat camerawork and I can understand why it has garnered so much praise from the audience. The repetitive guitar tune is great as it sets the melancholic and emotive feel in this road trip movie. Some standouts are personally the peep talk/mirror scenes and the hallway scene in green and red lighting to name a few, the dialogue between <PERSON> and <PERSON> was genuinely intense and heartbreaking. One of the things I didn’t like was character wise, how much of an insufferable father <PERSON> is and the way he treats everyone especially to <PERSON> and <PERSON>. At the start, his brother <PERSON> made huge effort to travel to Texas and helped him get back on his feet and to get rid of his “muteness” and “amnesia” and all <PERSON> did was walk away and make things difficult between them. From my understanding, the message of this movie is about reconnection and redemption, when <PERSON> decided to make amendments with <PERSON> and reconnect with <PERSON> in the second half only to drive away and leave them in the hotel because he has come to terms that they will not be happy if they’re back together again. So in conclusion <PERSON> is a miserable dick and <PERSON> and <PERSON> are just better off without him.\n\nAlso I feel like I could use a rewatch at some point, it was a bit of a struggle to watch as there was no subtitles and some mf’s watch was beeping on and off at the third half of the movie and during the climatic scenes which took me out of the movie a little eek\n\nPriority watchlist","<PERSON>\n\n""I can tell you're lying. I feel things *really* deeply.""\n\nI quite enjoyed X. While it seemed like just another slasher, the aesthetic, acting, and character development were enough for it to stand out as a delight.\n\nI also knew this wasn't going to be a slasher, or at least not in quite the same blatant way. I was also a little worried about this prequel coming out after X, since we know how this character is in that film.\n\nThis is somehow not only a little better, but also makes X even better. This is a visual treat of a psychological character horror. The Technicolor feel pops on the big screen, the color correction and editing doing wonders (at least as much as a 2020s production can).\n\n<PERSON> *is* the star that <PERSON> so desperately believes she is. From the expressions and whiplash in behaviors, to that one scene, this is her showcase. Also, in a role that is being extremely overlooked in my opinion, <PERSON> as <PERSON>'s father emotes so much while being able to do very little.\n\nThe only thing that I'm a little soft (heh) on is the mother-daughter dynamic that just felt a little flat to me. Nothing on the acting, the path was just predictable.","[-0.55436575, 1.6453257, 0.34281668, 1.0835814, -0.60306823, -1.4561306, -0.4581516]","[0.8712282, 0.88862455, 0.8783236, 0.884182, 0.87084305, 0.8640966, 0.87198913]",1


In [28]:
layer_x_pairs_df.to_json('../data/layer_to_pairs_signficance.json')

#### Step 2:

In [8]:
from datadreamer import DataDreamer
from datadreamer.llms import HFTransformers, ParallelLLM, OpenAI
from datadreamer.steps import DataFromPrompt, ProcessWithPrompt,  HFHubDataSource, DataSource, zipped, concat
from functools import partial
from transformers import QuantoConfig
from datasets import concatenate_datasets, load_dataset
import json


model = OpenAI(model_name="gpt-4", api_key='sk-proj-zTbZNk16Ik1pZnqLn38ZT3BlbkFJImq3pd7widkr7RzsC771') #kathy's lab
# model = HFTransformers(
#                 "meta-llama/Meta-Llama-3-8B-Instruct",
#                 quantization_config=QuantoConfig(weights="int8"),
#                 device=0,
#                 device_map="cuda",
#             )
# model.config.pad_token_id = model.config.eos_token_id

def gen_from_iterable_dataset(iterable_ds):
    yield from iterable_ds

def evaluate_text_similarities(data_path, document_pairs, linguistic_lvl_name, linguistic_lvl_desc):
    instruction = "Given the two Documents below, rate their <linguistic_lvl> on a scale from 1 to 5: Score 1 equals very low similarity and score 5 equals high similarity. First, give reasons for your score and then output the score. The output should be in the following format: {\"reasons\": \"explain your rating\",  \"score\": \"<json integer>\"}"
    instruction = instruction.replace("<linguistic_lvl>", linguistic_lvl_desc)
    
    with DataDreamer(data_path):
        datasource = DataSource('documents', Dataset.from_list(document_pairs))
        datasource = datasource.map(lambda row: {'inputs': 'Document 1:\n {} \n Document 2:\n {}'.format(row['text-1'], row['text-2'])}, auto_progress=False)
        ds_focus_questions = ProcessWithPrompt(
          "{} describe text similarity".format(linguistic_lvl_name),
          inputs={"inputs": datasource.output["inputs"]},
          args={
             "llm": model,
             "n": 1,
             "instruction": instruction
          },
          outputs={"generations": linguistic_lvl_name},
        ).select_columns([linguistic_lvl_name])
        
        zipped_step = zipped(datasource, ds_focus_questions)

        results_iter = zipped_step.output.dataset
        results_ds   = Dataset.from_generator(partial(gen_from_iterable_dataset, results_iter))

        return results_ds

In [10]:
layer_x_pairs_df = pd.read_json('../data/layer_to_pairs_signficance.json')

In [11]:
layer_x_pairs_df.layer.value_counts()

0    10
2    10
3    10
6    10
4     4
1     3
5     3
Name: layer, dtype: int64

In [12]:
# Given the two Documents below, rate their <linguistic_lvl> on a scale from 1 to 5: Score 1 equals very low similarity and score 5 equals high similarity. First, give reasons for your score and then output the score in the following output format: {\"reasons\": \"explain your rating\",  \"score\": \"<json integer>\"}
ling_phenomena = {
    'syntax': 'syntactic similarity',
    'semantic': 'semantic similarity',
    'lexical': 'lexical similarity',
    'discourse': 'discourse similarity'
}

In [20]:
layer_x_pairs = [row.to_dict() for idx, row in layer_x_pairs_df.iterrows()]

In [23]:
results = []
for key, val in ling_phenomena.items():
    results.append(evaluate_text_similarities('./output', layer_x_pairs, key, val))

[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Initialized. 🚀 Dreaming to folder: ./output
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents' was previously run and saved, but was outdated. 😞
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents' finished and is saved to disk. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' will run lazily. 🥱
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' was previously run and saved, but was outdated. 😞
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' finished running lazily. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' progress: 40 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer

In [24]:
all_results = concatenate_datasets(results)

In [25]:
all_results.save_to_disk('../data/described_similarities_ds')

Saving the dataset (0/1 shards):   0%|          | 0/200 [00:00<?, ? examples/s]

### Analyze layer to lingustic similarity:

In [26]:
all_results = load_from_disk('../data/described_similarities_ds')

In [27]:
all_results_df = all_results.to_pandas()

In [28]:
zscores= all_results_df['z-score'].tolist()

In [32]:
all_results_df[['text-1', 'text-2', 'layer', 'lexical', 'syntax', 'discourse', 'semantic']].head(n=50)

Unnamed: 0,text-1,text-2,layer,lexical,syntax,discourse,semantic
0,"Nothing Happened This Morning\n\nThe rhythms of life as captured through incredibly playful editing—the way your brain tries to make sense of reality as you’re awakened by an alarm clock and get out of bed, the excitement of morning coffee (and juice!), the way a <PERSON> flute piece can animate your living quarters (and during this the film moves from b&w to color), a ticking clock anchoring the banality of everyday as the edits show how much vibrancy exists in our lives otherwise. Loved when he’s transported between his home and the coffee shop.","Miniature\n\nThe electronic soundtrack, while moderately appropriate in its thalassic melodies, do too much heavy lifting in conveying the particular emotional pitch here. When the actual sounds of water arrive it's just too much, doubling down on the saccharine and heavy-handed. The text sours as a result, as its poeticism (already rote compared to, say, the work of <PERSON> or <PERSON> post-Benning text explorations in Fainting Spells), feels like it's not disruptive enough. This is to say that the convergence of music and poetry and dance is just too one-dimensional. I want to be invited into this film, to explore the feelings and ideas therein, but <PERSON> is spoonfeeding everything to the viewer. There's a patina of elegance but it only serves as a reminder that each element isn't treated as something expansive, but as a shortcut for vibes.",0,,"{""reasons"": ""Both documents are film reviews and share a similar structure in terms of analyzing different aspects of the films. They both discuss the use of sound and music, the emotional impact, and the overall execution of the film. However, the tone and language used in each document are quite different. Document 1 uses more positive and enthusiastic language, while Document 2 is more critical and analytical. The sentence structures also differ, with Document 1 using shorter, more straightforward sentences and Document 2 using longer, more complex sentences."", ""score"": 3}",,
1,"Nothing Happened This Morning\n\nThe rhythms of life as captured through incredibly playful editing—the way your brain tries to make sense of reality as you’re awakened by an alarm clock and get out of bed, the excitement of morning coffee (and juice!), the way a <PERSON> flute piece can animate your living quarters (and during this the film moves from b&w to color), a ticking clock anchoring the banality of everyday as the edits show how much vibrancy exists in our lives otherwise. Loved when he’s transported between his home and the coffee shop.","Coffee Break\n\nThis is a silent short film with an unsuccessful structural gambit. We watch women on a coffee break and the camera remains fixed. After a while the reel runs out and we're left with an imageless screen, and then the image appears again, and then we zoom in. This repeats about five times throughout the course of the film, the interest we gain primarily coming from the way a newspaper will block another's face or looking at them flip through a photo album. But there's little consideration for how this framing is advancing the film in a meaningful way, and when the film ends with a close-up of the coffee with the women all leaving, it feels unearned.",0,,"{""reasons"": ""Both documents discuss films, specifically focusing on the themes and cinematography. However, the first document discusses a film with a dynamic and vibrant narrative, while the second document discusses a film with a static and repetitive structure. The first document uses more descriptive and emotive language, while the second document uses more critical and analytical language. Therefore, while they share a common topic, their syntactic structures and tones are quite different."", ""score"": 2}",,
2,"Miniature\n\nThe electronic soundtrack, while moderately appropriate in its thalassic melodies, do too much heavy lifting in conveying the particular emotional pitch here. When the actual sounds of water arrive it's just too much, doubling down on the saccharine and heavy-handed. The text sours as a result, as its poeticism (already rote compared to, say, the work of <PERSON> or <PERSON> post-Benning text explorations in Fainting Spells), feels like it's not disruptive enough. This is to say that the convergence of music and poetry and dance is just too one-dimensional. I want to be invited into this film, to explore the feelings and ideas therein, but <PERSON> is spoonfeeding everything to the viewer. There's a patina of elegance but it only serves as a reminder that each element isn't treated as something expansive, but as a shortcut for vibes.","Coffee Break\n\nThis is a silent short film with an unsuccessful structural gambit. We watch women on a coffee break and the camera remains fixed. After a while the reel runs out and we're left with an imageless screen, and then the image appears again, and then we zoom in. This repeats about five times throughout the course of the film, the interest we gain primarily coming from the way a newspaper will block another's face or looking at them flip through a photo album. But there's little consideration for how this framing is advancing the film in a meaningful way, and when the film ends with a close-up of the coffee with the women all leaving, it feels unearned.",0,,"{""reasons"": ""Both documents are film reviews and share a similar structure in terms of critique and analysis. They both start with a brief introduction of the film, followed by an analysis of the film's elements and end with a conclusion about the film's overall impact. However, the content and the specific elements they discuss are different. Document 1 focuses on the soundtrack, emotional pitch, and the convergence of music, poetry, and dance. Document 2, on the other hand, discusses the film's structure, framing, and the use of a silent short film format. Therefore, while they share a similar structure and style, the specific content and focus of their critique are different."", ""score"": 3}",,
3,"The Golden Glove\n\nSought this out after <PERSON> named it one of the best films of 2019 while also being the most horrible. It’s certainly up there on both counts.\n\n<PERSON> called The Golden Glove a “fetid corpse flower” and honestly his description is terrific, encapsulating everything I admired about the film (whereas he took exception to it being “godforsaken”, Americans lol). Personally I’d stamp that review on the back of the Blu Ray.\n\nAll I want to add is that <PERSON> was 22 when this was made. 22. His performance is monstrous, queasy, shit-streaked, and weirdly tragic. It’s the most startling transformation from a young actor that I can think of.","The Gate\n\nWhere the hell was this film in my childhood? It’s the movie equivalent of an unforgettable trick-or-treat haul: the kind you’re rewarded with for risking a visit to that creepy house at the end of the street.\n\nFun all the way through, with some genuinely creepy moments, and a black metal loving nerd straight out of a <PERSON> flick. Yes please. And how could you not love a film in which Lovecraftian forces meet their match in a boy’s TOY ROCKET that also represents how hard (but vital) it is to stay friends with your teenage siblings. That’s PG-13 horror at its least cynical.\n\nIn terms of visual imagination and atmosphere, I can imagine this is the film <PERSON> watched every Halloween. It’s going straight on my annual watchlist too.",0,,"{""reasons"": ""The two documents have different structures and themes. Document 1 is a review of a film called 'The Golden Glove' and focuses on the performance of an actor, while Document 2 is a review of a film called 'The Gate' and discusses the plot and visual elements of the film. The sentence structures and vocabulary used in both documents are also different."", ""score"": ""2""}",,
4,"The Golden Glove\n\nSought this out after <PERSON> named it one of the best films of 2019 while also being the most horrible. It’s certainly up there on both counts.\n\n<PERSON> called The Golden Glove a “fetid corpse flower” and honestly his description is terrific, encapsulating everything I admired about the film (whereas he took exception to it being “godforsaken”, Americans lol). Personally I’d stamp that review on the back of the Blu Ray.\n\nAll I want to add is that <PERSON> was 22 when this was made. 22. His performance is monstrous, queasy, shit-streaked, and weirdly tragic. It’s the most startling transformation from a young actor that I can think of.","<PERSON>\n\nA spaceship which is also a mountain. A bridal chamber which is also a titanic clawed gauntlet. A sensitive cyclops with unparalleled depth perception. Miss <PERSON> guarded by a gargantuan albino spider.\n\nThis movie is bursting with ideas, which on the whole are terribly executed. The concept design, the art direction — these are, at times, astounding. The performances and editing are horrendous throughout.\n\nMostly it’s a painfully dull and knuckleheaded Star Wars ripoff led by <PERSON> in tights. But then, for brief flashes, it becomes a surreal nightmare, a <PERSON> painting brought to life.\n\nSome of these sequences fucked me up as a child but inspire the hell out of me as an adult.\n\n<PERSON>, as he lays dying at the steps of The Beast’s space mountain: “I was wrong. The journey was worth it.”",0,,"{""reasons"": ""The two documents have different syntactic structures and themes. Document 1 is a review of a film called 'The Golden Glove' and the sentences are structured around the critique of the film and the performance of the actors. Document 2, on the other hand, is a critique of a different film, with sentences structured around the description of the film's elements and the reviewer's personal experience. The sentence structures, themes, and vocabulary used in both documents are quite different."", ""score"": 2}",,
5,"The Golden Glove\n\nSought this out after <PERSON> named it one of the best films of 2019 while also being the most horrible. It’s certainly up there on both counts.\n\n<PERSON> called The Golden Glove a “fetid corpse flower” and honestly his description is terrific, encapsulating everything I admired about the film (whereas he took exception to it being “godforsaken”, Americans lol). Personally I’d stamp that review on the back of the Blu Ray.\n\nAll I want to add is that <PERSON> was 22 when this was made. 22. His performance is monstrous, queasy, shit-streaked, and weirdly tragic. It’s the most startling transformation from a young actor that I can think of.","The Bathtub\n\nFor years I showed this to my filmmaking students as an example of how to use framing as a character. They always admired it, as do I — even after countless viewings.\n\nThe Bathtub’s restraint and pathos is even more admirable considering it’s a student film. Back when I was in film school, I was taking big swings which almost always missed.\n\nWithin a single unbroken shot, we’re taken through a whole gamut of sibling history: nostalgia, disappointment, tenderness, frustration, acceptance.\n\nThe wrestling match between the brothers’ past and present relationship is so built-in to the film’s construction that, on one level, I almost see this as a time travel movie.\n\nOK, enough. This review is longer than the film.",0,,"{""reasons"": ""Both documents are film reviews and share a similar structure, starting with the title of the film, followed by the reviewer's personal thoughts and experiences related to the film. However, the content, tone, and specific details discussed in each review are quite different. Document 1 focuses on the performance of an actor and the disturbing nature of the film, while Document 2 discusses the technical aspects of filmmaking and the emotional depth of the film. Therefore, while they share a similar format, the specific content and themes discussed are quite different."", ""score"": 3}",,
6,"The Gate\n\nWhere the hell was this film in my childhood? It’s the movie equivalent of an unforgettable trick-or-treat haul: the kind you’re rewarded with for risking a visit to that creepy house at the end of the street.\n\nFun all the way through, with some genuinely creepy moments, and a black metal loving nerd straight out of a <PERSON> flick. Yes please. And how could you not love a film in which Lovecraftian forces meet their match in a boy’s TOY ROCKET that also represents how hard (but vital) it is to stay friends with your teenage siblings. That’s PG-13 horror at its least cynical.\n\nIn terms of visual imagination and atmosphere, I can imagine this is the film <PERSON> watched every Halloween. It’s going straight on my annual watchlist too.","The Devil All the Time\n\n“Some people are born just so they can be buried.”\n\nIt’s funny that the thing which brought the novel praise — a relentless, almost mythic, vision of a traumatised post-war America trapped in a destiny of death begetting death — is precisely what bothers cinephiles about the film.\n\nI had to google the word “edgelord” to even navigate half the reviews on here, which I find ironic coming from critics that usually fawn over bleak depictions of working class doom-cycles, just so long as they’re depicted in an austere social realist aesthetic. Perhaps there’s just not enough Otherness to sweeten the pot for these folk.\n\nLook, I don’t think this film is anywhere near as mythic as it sets out to be. The music is mushy and the narration lacks the style and bite required to not turn out hokey (which is especially disappointing since the author himself does the VO).\n\nOn the plus side, I can’t fault a single performance. <PERSON> never fails to impress me, <PERSON> was suitably sleazy, and even <PERSON> was convincing.\n\nMost of all, between his role in this and The Ballad of Buster Scruggs, grown-up <PERSON> is emerging as one of Britain’s most exciting character actors.\n\n<PERSON> is fast becoming a favourite cinematographer of mine, filling a sun-burnt south with painterly shadows. It’s nice to see a Netflix production that isn’t criminally over-exposed.\n\nI enjoyed the pacing of the individual scenes yet overall the film felt simultaneously too thin and too long. Can’t help think this would have worked better as a mini series so that the atmosphere could play out more richly and the psychologies of each doomed player could be fleshed out before being buried.\n\nMost of all I wish that <PERSON> had directed this.",0,,"{""reasons"": ""Both documents are film reviews and share a similar structure, starting with a brief introduction, followed by an analysis of the film's elements, and ending with a personal opinion. However, the tone, style, and language used in each review are quite different. Document 1 uses a more casual and enthusiastic tone, while Document 2 is more formal and critical. The sentence structures also vary, with Document 1 using shorter, simpler sentences and Document 2 using longer, more complex sentences. The topics discussed in each review are also different, with Document 1 focusing more on the film's atmosphere and themes, and Document 2 focusing more on the film's performances and cinematography."", ""score"": 3}",,
7,"The Gate\n\nWhere the hell was this film in my childhood? It’s the movie equivalent of an unforgettable trick-or-treat haul: the kind you’re rewarded with for risking a visit to that creepy house at the end of the street.\n\nFun all the way through, with some genuinely creepy moments, and a black metal loving nerd straight out of a <PERSON> flick. Yes please. And how could you not love a film in which Lovecraftian forces meet their match in a boy’s TOY ROCKET that also represents how hard (but vital) it is to stay friends with your teenage siblings. That’s PG-13 horror at its least cynical.\n\nIn terms of visual imagination and atmosphere, I can imagine this is the film <PERSON> watched every Halloween. It’s going straight on my annual watchlist too.","<PERSON>\n\nA spaceship which is also a mountain. A bridal chamber which is also a titanic clawed gauntlet. A sensitive cyclops with unparalleled depth perception. Miss <PERSON> guarded by a gargantuan albino spider.\n\nThis movie is bursting with ideas, which on the whole are terribly executed. The concept design, the art direction — these are, at times, astounding. The performances and editing are horrendous throughout.\n\nMostly it’s a painfully dull and knuckleheaded Star Wars ripoff led by <PERSON> in tights. But then, for brief flashes, it becomes a surreal nightmare, a <PERSON> painting brought to life.\n\nSome of these sequences fucked me up as a child but inspire the hell out of me as an adult.\n\n<PERSON>, as he lays dying at the steps of The Beast’s space mountain: “I was wrong. The journey was worth it.”",0,,"{""reasons"": ""Both documents are film reviews and share a similar structure, starting with a brief introduction, followed by a description of the film's elements, and ending with a personal reflection. However, the sentence structures and vocabulary used in each document are quite different. Document 1 uses more casual and colloquial language, while Document 2 uses more formal and descriptive language. The themes and content of the films being reviewed are also different, with Document 1 discussing a horror film and Document 2 discussing a science fiction film."", ""score"": 3}",,
8,"The Gate\n\nWhere the hell was this film in my childhood? It’s the movie equivalent of an unforgettable trick-or-treat haul: the kind you’re rewarded with for risking a visit to that creepy house at the end of the street.\n\nFun all the way through, with some genuinely creepy moments, and a black metal loving nerd straight out of a <PERSON> flick. Yes please. And how could you not love a film in which Lovecraftian forces meet their match in a boy’s TOY ROCKET that also represents how hard (but vital) it is to stay friends with your teenage siblings. That’s PG-13 horror at its least cynical.\n\nIn terms of visual imagination and atmosphere, I can imagine this is the film <PERSON> watched every Halloween. It’s going straight on my annual watchlist too.","The Bathtub\n\nFor years I showed this to my filmmaking students as an example of how to use framing as a character. They always admired it, as do I — even after countless viewings.\n\nThe Bathtub’s restraint and pathos is even more admirable considering it’s a student film. Back when I was in film school, I was taking big swings which almost always missed.\n\nWithin a single unbroken shot, we’re taken through a whole gamut of sibling history: nostalgia, disappointment, tenderness, frustration, acceptance.\n\nThe wrestling match between the brothers’ past and present relationship is so built-in to the film’s construction that, on one level, I almost see this as a time travel movie.\n\nOK, enough. This review is longer than the film.",0,,"{""reasons"": ""Both documents are film reviews and share a similar structure, starting with a title, followed by the reviewer's personal experience and interpretation of the film. They both discuss themes and elements of the films, and express admiration for the films. However, the specific content, tone, and language used in each review are quite different. Document 1 is more casual and enthusiastic, while Document 2 is more analytical and reflective. The films being reviewed also seem to be of different genres, with Document 1 discussing a horror film and Document 2 discussing a more dramatic, character-driven film."", ""score"": 3}",,
9,"The Devil All the Time\n\n“Some people are born just so they can be buried.”\n\nIt’s funny that the thing which brought the novel praise — a relentless, almost mythic, vision of a traumatised post-war America trapped in a destiny of death begetting death — is precisely what bothers cinephiles about the film.\n\nI had to google the word “edgelord” to even navigate half the reviews on here, which I find ironic coming from critics that usually fawn over bleak depictions of working class doom-cycles, just so long as they’re depicted in an austere social realist aesthetic. Perhaps there’s just not enough Otherness to sweeten the pot for these folk.\n\nLook, I don’t think this film is anywhere near as mythic as it sets out to be. The music is mushy and the narration lacks the style and bite required to not turn out hokey (which is especially disappointing since the author himself does the VO).\n\nOn the plus side, I can’t fault a single performance. <PERSON> never fails to impress me, <PERSON> was suitably sleazy, and even <PERSON> was convincing.\n\nMost of all, between his role in this and The Ballad of Buster Scruggs, grown-up <PERSON> is emerging as one of Britain’s most exciting character actors.\n\n<PERSON> is fast becoming a favourite cinematographer of mine, filling a sun-burnt south with painterly shadows. It’s nice to see a Netflix production that isn’t criminally over-exposed.\n\nI enjoyed the pacing of the individual scenes yet overall the film felt simultaneously too thin and too long. Can’t help think this would have worked better as a mini series so that the atmosphere could play out more richly and the psychologies of each doomed player could be fleshed out before being buried.\n\nMost of all I wish that <PERSON> had directed this.","The Bathtub\n\nFor years I showed this to my filmmaking students as an example of how to use framing as a character. They always admired it, as do I — even after countless viewings.\n\nThe Bathtub’s restraint and pathos is even more admirable considering it’s a student film. Back when I was in film school, I was taking big swings which almost always missed.\n\nWithin a single unbroken shot, we’re taken through a whole gamut of sibling history: nostalgia, disappointment, tenderness, frustration, acceptance.\n\nThe wrestling match between the brothers’ past and present relationship is so built-in to the film’s construction that, on one level, I almost see this as a time travel movie.\n\nOK, enough. This review is longer than the film.",0,,"{""reasons"": ""Both documents are reviews of different films, and they share a similar structure in terms of discussing the elements of the films, such as the performances, direction, and cinematography. However, the language, tone, and specific content of the reviews are quite different. Document 1 is more critical and uses more complex language and sentence structures, while Document 2 is more straightforward and positive. The topics discussed in each review are also different, with Document 1 focusing more on the film's shortcomings and Document 2 focusing on the film's strengths."", ""score"": 3}",,


In [33]:
all_results_df.layer.value_counts()

0    40
2    40
3    40
6    40
4    16
1    12
5    12
Name: layer, dtype: int64

In [34]:
def aggregate_score(list_of_scores):
    scores = []
    for json_str in list_of_scores:
        if json_str == None:
            continue
        try:
            if "\n\n" in json_str:
                json_str = json_str.split("\n")[-1].strip()

            json_str = json_str.replace("Output:", "").strip()
                
                
            json_obj = json.loads(json_str)            
            scores.append(int(json_obj['score']))
        except:
            print(json_str)
            print('==============')
    return round(np.median(scores), 2)

# Aggregating layer score
layer_scores_df = all_results_df.groupby('layer').aggregate({
    'lexical': aggregate_score,
    'syntax': aggregate_score,
    'discourse': aggregate_score,
    'semantic': aggregate_score,
}).reset_index()

In [35]:
layer_scores_df.head(n=7)

Unnamed: 0,layer,lexical,syntax,discourse,semantic
0,0,1.0,3.0,2.0,1.5
1,1,2.0,3.0,3.0,3.0
2,2,2.0,3.0,3.0,2.0
3,3,1.0,2.5,2.0,1.0
4,4,1.5,2.5,2.5,2.0
5,5,2.0,3.0,3.0,2.0
6,6,1.0,2.0,1.5,1.0
