In [1]:
%load_ext autoreload

In [71]:
%autoreload
import sys
sys.path.insert(0, '../')

In [72]:
from src.utilities.mluar_utils import *

In [73]:
from transformers import AutoModel, AutoTokenizer
from datasets import load_dataset, Dataset, load_from_disk
import numpy as np
from einops import rearrange, reduce, repeat
import torch
from sklearn.metrics.pairwise import cosine_similarity
from matplotlib import pyplot as plt
import math
import pandas as pd
import pickle as pkl

pd.set_option('display.max_colwidth', None)


In [67]:
MULTI_LUAR_PATH =  "/mnt/swordfish-pool2/milad/multi-luar-reddit-model/"
LUAR_PATH =  "/mnt/swordfish-pool2/nikhil/LUAR/pretrained_weights/LUAR-MUD/"

In [6]:
# Load models
multiluar_model = AutoModel.from_pretrained(MULTI_LUAR_PATH, trust_remote_code=True)
luar_model = AutoModel.from_pretrained(LUAR_PATH, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("rrivera1849/LUAR-MUD")

In [7]:
# Load data
data_path = '/mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/data/hrs_06-27-24_english_perGenre-HRS2.1_TA2_input'
ground_truth_path = '/mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/groundtruth/hrs_06-27-24_english_perGenre-HRS2.1_TA2'
hiatus_data, _, _ = load_aa_data(data_path, ground_truth_path)

Loading:  /mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/data/hrs_06-27-24_english_perGenre-HRS2.1_TA2_input


In [8]:
# keep authors with only more than one text
authors_with_multiple_texts = [x[0] for x in hiatus_data.authorID.value_counts().to_dict().items() if x[1] > 1]
hiatus_data = hiatus_data[hiatus_data.authorID.isin(authors_with_multiple_texts)]

In [9]:
hiatus_data = hiatus_data.sample(2000)

In [10]:
hiatus_data_texts = hiatus_data.fullText.tolist()
labels = hiatus_data['authorID'].tolist()

# Compute embeddings
max_seq_length = 736
hiatus_data_embeddings = get_luar_embeddings(hiatus_data_texts, multiluar_model, tokenizer, max_length=max_seq_length, batch_size=1, is_multi_luar=True)

In [11]:
#np.max([len(x.split()) for x in hiatus_data.fullText.tolist()])

### Experiment Design

- For each layer, we find pairs of ground-truth texts written by the same author where the corresponding layer's embedding scored them high compared to other layers
- For each layer, we take a sample of these pairs of texts and prompt ChatGPT to find which linguistic level they are similar

#### Step 1:

In [74]:
# Compute Multi-luar similarity matrices at every layer and average of the layers
muti_luar_layers_sims = [compute_similarities(hiatus_data_embeddings, hiatus_data_embeddings, layer=i) for i in range(7)]
#muti_luar_layers_sims.append(compute_similarities(hiatus_data_embeddings, hiatus_data_embeddings, layer=None))
muti_luar_layers_sims = np.stack(muti_luar_layers_sims)

In [78]:
# Compute significant pairs to layers
layer_to_sig_pairs = {layer: extract_sig_pairs_for_layer(hiatus_data_texts, muti_luar_layers_sims, labels, layer) for layer in range(7)}

In [79]:
len(labels), len(set(labels))

(2000, 963)

In [80]:
layer_x_pairs = []
for layer in [0, 1, 2, 3, 4, 5, 6]:
    print(layer, len(layer_to_sig_pairs[layer]))
    sample_of_pairs = layer_to_sig_pairs[layer][:10]
    layer_x_pairs += [{'text-1': x[0], 'text-2': x[1], 'z-score': x[2], 'layer-sim': x[3], 'layer': layer} for x in sample_of_pairs]
layer_x_pairs_df = pd.DataFrame(layer_x_pairs)

0 0
1 0
2 0
3 0
4 0
5 0
6 0


In [63]:
layer_x_pairs_df[layer_x_pairs_df.layer == 6].head()

Unnamed: 0,text-1,text-2,z-score,layer-sim,layer
13,"Tables that leave\n\nAs a host-nothing is more awkward/uncomfortable than when a table gets up and leaves because of the service.\n\nTonight was overall a terrible shift. We had two newer servers on and one experienced server who wanted to get cut first. With newer servers it‚Äôs hard because we can‚Äôt tell how much they can handle at once until we get used to working with them.\n\nAnyways I seat a four top with a reservation and they were super nice. It was pretty busy, but Saturday nights are always busy (I work in a big city too)\n\nThey got drinks but then out of the blue they all get up to leave and tell my co-host and I they‚Äôre leaving because the server has been gone for forever and it‚Äôs not worth it. It‚Äôs such an awkward position to be in because wtf am I supposed to do other than say I‚Äôm so sorry. Plea with them to stay? Grab my manager? I actually couldn‚Äôt get my manager because she was running around like a madwoman all night.\n\nSo they left and it was just like ‚Ä¶.\n\nThen an hour later we sat another four top with a reservation and legit same thing. They leave without even being watered (I‚Äôm not sure if they were greeted because sometimes customers flip the glasses over themselves)\n\nI felt bad for the server but my hands were tied so I had to seat the them. Sometimes I‚Äôll check in with servers when I‚Äôve noticed a table hasn‚Äôt been watered but sometimes they get annoyed and go ‚ÄúI know‚Ä¶‚Äù or ‚Äúthey can wait‚Äù But what am I supposed to say when people tell us they‚Äôre leaving because of slow service????","Does the kitchen hate the hosts?\n\nI work in a big city but our kitchen is fairly small compared to the size of the restaurant. Anyways my manager‚Äôs whole ordeal is never say no. Business is business. If an eight top comes in without a reservation it doesn‚Äôt matter you seat them like that kind of manager. One time a host turned away a 14 top, 14 top emailed manager and host got fired (for other reasons too but that was a big part of it)\n\nAnyways today at lunch we got maybe 300000 walk-ins and it was stressful but I made it work. My manager comes running out saying how the kitchen is slammed and I need to stop taking people. I‚Äôm like ok cool that‚Äôs actually good because there‚Äôs only two servers on anyways and they‚Äôre weeded.\n\nIt finally dies down and I ask my mgr if the kitchen hates me. She just goes\n\n‚ÄúUm‚Ä¶..I mean they were dying in there they had so many tickets‚Äù\n\nSo does the kitchen hate me? And hosts in general?","[0.20142695, -0.33192047, -1.5438107, 0.014910437, -0.56804776, 0.20199834, 2.0253887]","[0.9343342, 0.9308291, 0.9228647, 0.93310845, 0.9292773, 0.934338, 0.9463211]",6
14,"The Revenant\n\nHoly actual shit.\n\n<PERSON>! <PERSON> Man!! If he doesn't win the Oscar this time, he should just honestly quit acting.\n\n<PERSON> is f***ing boss once again in an antagonist role.\n\n<PERSON> strikes again with his mind-blowing camerawork.\n\nI think I almost fell off my couch three times because of how damn suspenseful this was.\n\nThe action sequences were intense as hell.\n\nThe dialogue is more than fantastic.\n\nThe plot is very well executed.\n\nWow. Just wow.\n\nI had to contain myself while writing this review because just thinking about this film makes me absolutely hyped.\n\nThe Revenant is overwhelmingly breath-taking.","The <PERSON>\n\nGoing into this movie I had 3 worries: it was going to be extremely overrated, it wasn't going to be entertaining, and I wasn't going to understand the plot of the film. None of these things turned out to be true, and I'm very delighted by this.\n\nA film being called the best movie of all-time definitely will be called overrated by most, but this film is just excellent in every aspect. Its story is as masterful and interesting as any other great film from this day and age. The acting is beyond good, as <PERSON>, <PERSON>, and <PERSON> absolutely blew me away with their performances. Also, the action sequences and suspense were very well done, as I was on the edge of my seat during these instances.\n\nUsually I'm not fond of films made before 1980, but I made an exception to watch this on because of its critical acclaim. I was scared that the film would be boring because of a combination between this and its near 3 hour duration, but I couldn't keep my eyes off of the screen.\n\nSo all in all, The Godfather is a fast-paced, entertaining film with stellar performances. Although I wouldn't call it the best film of all time, The Godfather is indeed a masterpiece.","[0.842149, -0.6277406, -0.119830996, -0.57768816, -0.9466902, -0.6323912, 2.0621595]","[0.8849591, 0.87147045, 0.87613136, 0.87192976, 0.86854357, 0.8714278, 0.8961547]",6
15,"Advice for diaper dog?\n\nRecently adopted a special needs dog who has an ectopic ureter, which means that she leaks urine throughout the day. We have bought both washable and disposable diapers and recently I‚Äôve been working on making washable pads to help extend the life of the washable diapers throughout the day (still trying to find the best materials for effectiveness).\n\nWhen we got her, she had urine burns and a very swollen vulva with lesions. It‚Äôs been almost a couple of weeks now and she is looking much better because we change out her diapers at least every 3 hours (and the pad inserts every 1.5 hours). The pad inserts don‚Äôt have a leak proof back, so I have found that they aren‚Äôt really prolonging the use of the diapers when compared to using the diapers without. Our girl drinks a lot of water and I know some people limit it, but I don‚Äôt have the heart to make her go thirsty.\n\nIf you have a similar special needs pup, can you share your hygiene tips and tricks? I‚Äôm curious to know if we are doing this correctly or if others have found better methods.","I feel like the rescue we adopted from did not set us up for success\n\nMy partner and I adopted a special needs pup (1yr old) who has hip dysplasia and an ectopic ureter. Before we adopted, we spent hours communicating with the foster to make sure we understood what we were getting ourselves into. She is our first ever special needs animal, and we really wanted to give her a loving home.\n\nWhen we asked about her conditions and how they impact her quality of life and longevity, they seemed super optimistic. Everything was downplayed quite a bit. We had asked if her ectopic ureter would need surgery and they said no, she would just be a diaper baby the rest of her life. When I asked for copies of her medical records before adopting, we kept being told, ‚Äúyeah, we will get that to you‚Äù and it never happened. We trusted what we were being told and moved forward with adopting before seeing her actual medical records (I know, rookie mistake).\n\nNow, after taking her to the vet that originally diagnosed her conditions, I find out that she was referred to surgery last year but the rescue opted not to. As a result, our dog is developing new issues that are costing a lot of money and we need to decide if we will move forward with surgery to give her a healthier life. We don‚Äôt know how much it will cost yet, but based on what the vet said, it can be anywhere between $2000-$10000 (and no one does it locally so we have to go to another city!). My partner was just laid off from his job, so the timing really sucks.\n\nI‚Äôm so exhausted at this point with how much work she has been, and now financially we will be hurting too. The vet was shocked that the rescue never mentioned her surgery referral, and I‚Äôm so upset that they didn‚Äôt either. I love my dog and want to give this girl a happy life, but we are having regrets now. We are in over our heads. I hate myself for feeling this way because I do view her as family. The rescue/foster have ghosted us too, so that doesn‚Äôt feel great.\n\nIf anyone else has experience with ectopic ureters I would love to hear your story to help me not feel so alone with this. Or, if you just have some words of encouragement, I could use those too.","[-0.2772792, -1.2359449, -0.6468178, 0.4314044, -0.7371637, 0.45580003, 2.0099525]","[0.88805485, 0.8798429, 0.88488936, 0.89412546, 0.88411546, 0.89433444, 0.9076474]",6
16,"my cat is in distress and i don‚Äôt understand why\n\nmy cat has recently taken a liking to my roommate, when i try to pick him up & take him to my room he runs away, meows really loud, and makes me chase him around the coffee table.\n\nanyway what‚Äôs really concerning me is that at least 3 times a day, i‚Äôll come out to grab him so he stops meowing at my roommates door. he makes me follow him into the kitchen, and he sits and meows at our little pantry area\n\nhe is meowing like he wants food or something specific in that area, but his food and water are located in my room. i have no idea what it is, i don‚Äôt know what to do and it‚Äôs making me so sad that i can‚Äôt understand him. what does he want??? this has been going on for weeks","is it cruel to relocate an older cat? (she was my cat but i went to college)\n\nso i left my cat behind because my dad guilted me into it saying that he had grown attached to her. that was my cat for 10 years, she slept with me every night and was with me through all my trauma (years, complicated), my dad didn‚Äôt even know her until i moved in with him at the age of 13. but she‚Äôs about 9 or 10 years old, getting a bit skinnier and after standing up for myself to my dad he said i can take her back.\n\ni wish he had just let me take her to begin with, so i wouldn‚Äôt have to deal with this. my dad was selfish and put his feelings of my cat above mine. so i left her, but now i have the chance to take her back. would it be cruel to remove her from her new family? she knows me, but would she miss my dad or would she forget eventually?","[0.14108875, -0.49197036, -1.0870695, -0.78966993, -0.32635352, 0.39245847, 2.1615343]","[0.9158304, 0.9116783, 0.9077752, 0.9097258, 0.91276455, 0.91747904, 0.9290819]",6
17,"Spider-Man: No Way Home\n\nThere is lots to love about this movie, especially when it focuses on <PERSON> <PERSON> because he's just *chef's kiss* excellent here and the film really does paint a compelling evolution of the character from boy to man. But everything centered around the fan-service and cameos feels like an SNL sketch, both in style and in tone. 90% of the satisfying moments here take place in various dimly-lit living rooms, high-school classrooms, and bluescreen scaffolding across New York City. <PERSON> does not know how to direct epic iconography.","<PERSON>\n\nIt appears that <PERSON> has finally succumbed to the idea that ‚Äúresearching the movie for 2 hours afterward just to understand it‚Äù is the same thing as having real thematic depth and substance. Yes, if I watch this again, I will understand the plot more clearly. But will I gain anything beyond that? This is the first high-concept <PERSON> film where I don‚Äôt even really care to figure out the puzzle.\n\nI don‚Äôt know, man. It was a cool experience, with plenty of fun moments and some solid payoffs; though really, it often feels like a mish-mash of many things <PERSON> has achieved better (and more focused) in his previous films, from the ouroboros ‚Äútime loop‚Äù framing device to the parallel timelines and doppelg√§ngers. This is such a simple story, littered with sci-fi hallmarks and cliches, masquerading as something entirely new and subversive simply because it‚Äôs steeped in information and content. Often, it‚Äôs not even the crazy time-twisting that is hard to follow, it‚Äôs the basic scene-to-scene transitions and context for why we‚Äôre watching what we‚Äôre watching in the first place. Even the exposition is just dull; and <PERSON> at least usually manages to make wordy explanations fun to watch.\n\nI just don‚Äôt feel edified or fulfilled or changed or even challenged at all by this. It‚Äôs like a multi-million-dollar cinematic party trick made solely to impress, gaining points only by overwhelming the audience with an onslaught of convoluted jargon and then saying ‚Äúaye, if you don‚Äôt get this, you probably weren‚Äôt paying close enough attention to the mumbled and incomprehensible expository dialogue.‚Äù\n\nI‚Äôm glad it exists. I‚Äôm glad <PERSON> is making the things he wants to make. Some of the stuff close to the end was pretty exhilarating in its sheer technical wizardry. And ya know, I guess it‚Äôs okay that we have different types of movies that fulfill different purposes. Some movies are just kinda nifty and dope and trying to have some fun with an idea, and that‚Äôs totally cool. I‚Äôm totally understanding of why lots of people will probably dig it! But I‚Äôd much rather watch the last 10 minutes of the new Bill & Ted and cry my eyes out again.","[-0.37478483, -0.49054927, -1.5181968, 0.09157082, 0.4307748, -0.16216482, 2.023296]","[0.8382391, 0.83721805, 0.8281543, 0.8423523, 0.845344, 0.84011436, 0.85938984]",6


In [54]:
layer_x_pairs_df.to_pickle('../data/layer_to_pairs_signficance.pkl')

#### Step 2:

In [55]:
from datadreamer import DataDreamer
from datadreamer.llms import HFTransformers, ParallelLLM, OpenAI
from datadreamer.steps import DataFromPrompt, ProcessWithPrompt,  HFHubDataSource, DataSource, zipped, concat
from functools import partial
from transformers import QuantoConfig
from datasets import concatenate_datasets, load_dataset
import json


model = OpenAI(model_name="gpt-4", api_key='sk-proj-zTbZNk16Ik1pZnqLn38ZT3BlbkFJImq3pd7widkr7RzsC771') #kathy's lab
# model = HFTransformers(
#                 "meta-llama/Meta-Llama-3-8B-Instruct",
#                 quantization_config=QuantoConfig(weights="int8"),
#                 device=0,
#                 device_map="cuda",
#             )
# model.config.pad_token_id = model.config.eos_token_id

def gen_from_iterable_dataset(iterable_ds):
    yield from iterable_ds

def evaluate_text_similarities(data_path, document_pairs, linguistic_lvl_name, linguistic_lvl_desc):
    instruction = "Given the two Documents below, rate their <linguistic_lvl> on a scale from 1 to 5: Score 1 equals very low similarity and score 5 equals high similarity. First, give reasons for your score and then output the score. The output should be in the following format: {\"reasons\": \"explain your rating\",  \"score\": \"<json integer>\"}"
    instruction = instruction.replace("<linguistic_lvl>", linguistic_lvl_desc)
    
    with DataDreamer(data_path):
        datasource = DataSource('documents', Dataset.from_list(document_pairs))
        datasource = datasource.map(lambda row: {'inputs': 'Document 1:\n {} \n Document 2:\n {}'.format(row['text-1'], row['text-2'])}, auto_progress=False)
        ds_focus_questions = ProcessWithPrompt(
          "{} describe text similarity".format(linguistic_lvl_name),
          inputs={"inputs": datasource.output["inputs"]},
          args={
             "llm": model,
             "n": 1,
             "instruction": instruction
          },
          outputs={"generations": linguistic_lvl_name},
        ).select_columns([linguistic_lvl_name])
        
        zipped_step = zipped(datasource, ds_focus_questions)

        results_iter = zipped_step.output.dataset
        results_ds   = Dataset.from_generator(partial(gen_from_iterable_dataset, results_iter))

        return results_ds

ModuleNotFoundError: No module named 'huggingface_hub.utils._errors'

In [8]:
layer_to_sig_pairs = pkl.load(open('../data/layer_to_pairs_signficance.pkl', 'rb'))

In [9]:
layer_x_pairs = []
for layer in [0, 1, 2, 3, 4, 5, 6]:
    print(layer, len(layer_to_sig_pairs[layer]))
    sample_of_pairs = layer_to_sig_pairs[layer][:10]
    layer_x_pairs += [{'text-1': x[0], 'text-2': x[1], 'z-score': x[2], 'layer': layer} for x in sample_of_pairs]

0 2154
1 6
2 20
3 108
4 16
5 56
6 890


In [10]:
len(layer_x_pairs)

66

In [11]:
# Given the two Documents below, rate their <linguistic_lvl> on a scale from 1 to 5: Score 1 equals very low similarity and score 5 equals high similarity. First, give reasons for your score and then output the score in the following output format: {\"reasons\": \"explain your rating\",  \"score\": \"<json integer>\"}
ling_phenomena = {
    'syntax': 'syntactic similarity',
    'semantic': 'semantic similarity',
    'lexical': 'lexical similarity',
    'discourse': 'discourse similarity'
}

In [12]:
results = []
for key, val in ling_phenomena.items():
    results.append(evaluate_text_similarities('./output', layer_x_pairs, key, val))

[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Initialized. üöÄ Dreaming to folder: ./output
[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Step 'documents' results loaded from disk. üôå It was previously run and saved.
[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Step 'documents (map)' is running. ‚è≥
[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Step 'documents (map)' will run lazily. ü•±
[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Step 'syntax describe text similarity' was previously run and saved, but was outdated. üòû
[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Step 'syntax describe text similarity' is running. ‚è≥
[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Step 'documents (map)' finished running lazily. üéâ
[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Step 'syntax describe text similarity' progress: 50 row(s) üîÑ
[ [35mü§ñ Data[33mDr[31mea[35mmer[0m üí§ ] Step 'syntax describe text similarity' finished and is saved to disk. üéâ

In [13]:
all_results = concatenate_datasets(results)

In [14]:
all_results.save_to_disk('../data/described_similarities_ds')

Saving the dataset (0/1 shards):   0%|          | 0/264 [00:00<?, ? examples/s]

### Analyze layer to lingustic similarity:

In [15]:
all_results = load_from_disk('../data/described_similarities_ds')

In [16]:
all_results_df = all_results.to_pandas()

In [17]:
zscores= all_results_df['z-score'].tolist()

In [26]:
all_results_df[['text-1', 'text-2', 'layer', 'z-score']].head(n=50)

Unnamed: 0,text-1,text-2,layer,z-score
0,"Anatomy of a Fall\n\ni'm fully aware it‚Äôs not a fair comparison, but i‚Äôd like to be able to have a foundation of a dead-body-case type work-of-fiction to base my conclusion on how weak or strong the one i‚Äôm watching currently is. recently, and in this case, i‚Äôve come to referring to disco elysium, of which strongly elicited throughout my entire lengthy time with it what the evident culmination of anatomy of a fall - the kitchen fight recording, some of the child actors scenes - coaxed. immediately and on a purely cosmetic and surface level, Fall lacks any type of relatively incisive and personally disruptive style. this sort of flatly lit, boringly pictured and blocked, framed with little to no intention movie reminds me of Tar, from a few years ago, and at least for a little while we were happy with this natural Lightroom preset of a color palate. the question remains that if the director moved focus away from the visuals to focus on the performances, and the performances aren‚Äôt at all times captivating squarely enough to fill the vacuum, then why are you wasting our time\n\naddendum: <PERSON> states on an interview i read on cinematography world that the original intention was to shoot on 35, and they rather proceeded to shoot hawk v lite anamorphic on alexa then matte out the beautiful academy ratio to widescreen. horrific world we live in","<PERSON>\n\ngoing back and watching these in the original language instead of the goofy english dubs. a lot of people say this is the best one but i'm not really sure. most of them are pretty great. all of them are intensely watchable. this one in particular has the most complete plot with characters who are fearless and show each other mercy. on atmosphere alone the winner could go to maybe four or five of ghibli's other works. i remember watching this one as a kid pretty intensely since it's so violent and the sound effects try to reduce the gore to these cartoonish kinda tom and jerry actions but theres still so much blood. even when i was 10 or 12 i begin to notice that there weren't really characters like <PERSON>'s in american children's media, like role models that had personal resolve but that were also deeply flawed. cartoons here are much more concerned with being sardonic or melodramatic. the part where san baby birds the jerky in the main dude's mouth always gets me",0,"[1.6476448774337769, -1.2187529802322388, -0.9693524241447449, 0.131813183426857, -0.2457742691040039, -0.5583204627037048, 1.21274995803833]"
1,"The Power of the Dog\n\nin isolation there are very strong parts, unfortunately connected together with poorly directed ones that kind of meander without direction. the best scenes are cut too short and show nothing and the badly filmed ones always stretch way too long and show way too much. this film as a whole explores who or what killed <PERSON>. we know, because we've seen every part of the chain that leads to his death. however, where the entire thing fails is that it almost seems like we know more than the movie does","<PERSON>\n\ngoing back and watching these in the original language instead of the goofy english dubs. a lot of people say this is the best one but i'm not really sure. most of them are pretty great. all of them are intensely watchable. this one in particular has the most complete plot with characters who are fearless and show each other mercy. on atmosphere alone the winner could go to maybe four or five of ghibli's other works. i remember watching this one as a kid pretty intensely since it's so violent and the sound effects try to reduce the gore to these cartoonish kinda tom and jerry actions but theres still so much blood. even when i was 10 or 12 i begin to notice that there weren't really characters like <PERSON>'s in american children's media, like role models that had personal resolve but that were also deeply flawed. cartoons here are much more concerned with being sardonic or melodramatic. the part where san baby birds the jerky in the main dude's mouth always gets me",0,"[2.2426974773406982, -0.6790010333061218, -0.34340301156044006, -0.0539296455681324, -0.7155317068099976, -0.8478379845619202, 0.39701059460639954]"
2,"<PERSON>\n\ngoing back and watching these in the original language instead of the goofy english dubs. a lot of people say this is the best one but i'm not really sure. most of them are pretty great. all of them are intensely watchable. this one in particular has the most complete plot with characters who are fearless and show each other mercy. on atmosphere alone the winner could go to maybe four or five of ghibli's other works. i remember watching this one as a kid pretty intensely since it's so violent and the sound effects try to reduce the gore to these cartoonish kinda tom and jerry actions but theres still so much blood. even when i was 10 or 12 i begin to notice that there weren't really characters like <PERSON>'s in american children's media, like role models that had personal resolve but that were also deeply flawed. cartoons here are much more concerned with being sardonic or melodramatic. the part where san baby birds the jerky in the main dude's mouth always gets me","Anatomy of a Fall\n\ni'm fully aware it‚Äôs not a fair comparison, but i‚Äôd like to be able to have a foundation of a dead-body-case type work-of-fiction to base my conclusion on how weak or strong the one i‚Äôm watching currently is. recently, and in this case, i‚Äôve come to referring to disco elysium, of which strongly elicited throughout my entire lengthy time with it what the evident culmination of anatomy of a fall - the kitchen fight recording, some of the child actors scenes - coaxed. immediately and on a purely cosmetic and surface level, Fall lacks any type of relatively incisive and personally disruptive style. this sort of flatly lit, boringly pictured and blocked, framed with little to no intention movie reminds me of Tar, from a few years ago, and at least for a little while we were happy with this natural Lightroom preset of a color palate. the question remains that if the director moved focus away from the visuals to focus on the performances, and the performances aren‚Äôt at all times captivating squarely enough to fill the vacuum, then why are you wasting our time\n\naddendum: <PERSON> states on an interview i read on cinematography world that the original intention was to shoot on 35, and they rather proceeded to shoot hawk v lite anamorphic on alexa then matte out the beautiful academy ratio to widescreen. horrific world we live in",0,"[1.647646427154541, -1.2187526226043701, -0.9693564176559448, 0.13181640207767487, -0.24577262997627258, -0.5583148002624512, 1.2127469778060913]"
3,"<PERSON>\n\ngoing back and watching these in the original language instead of the goofy english dubs. a lot of people say this is the best one but i'm not really sure. most of them are pretty great. all of them are intensely watchable. this one in particular has the most complete plot with characters who are fearless and show each other mercy. on atmosphere alone the winner could go to maybe four or five of ghibli's other works. i remember watching this one as a kid pretty intensely since it's so violent and the sound effects try to reduce the gore to these cartoonish kinda tom and jerry actions but theres still so much blood. even when i was 10 or 12 i begin to notice that there weren't really characters like <PERSON>'s in american children's media, like role models that had personal resolve but that were also deeply flawed. cartoons here are much more concerned with being sardonic or melodramatic. the part where san baby birds the jerky in the main dude's mouth always gets me","The Power of the Dog\n\nin isolation there are very strong parts, unfortunately connected together with poorly directed ones that kind of meander without direction. the best scenes are cut too short and show nothing and the badly filmed ones always stretch way too long and show way too much. this film as a whole explores who or what killed <PERSON>. we know, because we've seen every part of the chain that leads to his death. however, where the entire thing fails is that it almost seems like we know more than the movie does",0,"[2.2426974773406982, -0.6790010333061218, -0.34340301156044006, -0.0539296455681324, -0.7155317068099976, -0.8478379845619202, 0.39701059460639954]"
4,"Falling Down\n\nReactionary praxis, revolutionary aesthetics\n\nIn Falling Down, <PERSON> character (D-fens) is the blueprint of the misguided revolutionary movement. Passionate and deed-based, yet unfocused, individualistic, and most importantly focused on the liberation of the consumer rather than the worker. Consumption of product under the guise of doing something more worthwhile, is at the heart of Falling Down, from the very first scene the toothy snarl of a <PERSON> plushy sets off ‚ÄúD-fens‚Äôs‚Äù rage, causing him to skip work for that day, instead starting and unhinged consumer revolution, holding up fast food stores and bodegas, using threats of violence as leverage for enhanced convenience. Although he still seems ‚Äúcountercultural‚Äù and on the side of the people, this focus on the consumer is in reality inherently anti-revolutionary, screwing over those who make the products even more. <PERSON> does an excellent job at portraying the misguided revolutionary in this biting and sardonic satire.","<PERSON>\n\nSo in the last scene based on the year on the headstone and the trees, this movie ends sometime in early fall 2001. It could reasonably assumed based on the trees and New York location that this movie ends sometime around early to mid September. I know I‚Äôm just over analyzing this but that means either:\n\nA. This film ends less than a week before 9/11, and symbolically may be one of the bleakest endings ever made.\n\nB. This film ends directly after 9/11 and the ending which is just completely skipped over and the ending is even more hopeful than it seems?!?",0,"[1.827519416809082, -1.5416673421859741, -0.7630870342254639, -0.15895895659923553, -0.16470670700073242, -0.004535033833235502, 0.8054291605949402]"
5,"<PERSON>\n\nSo in the last scene based on the year on the headstone and the trees, this movie ends sometime in early fall 2001. It could reasonably assumed based on the trees and New York location that this movie ends sometime around early to mid September. I know I‚Äôm just over analyzing this but that means either:\n\nA. This film ends less than a week before 9/11, and symbolically may be one of the bleakest endings ever made.\n\nB. This film ends directly after 9/11 and the ending which is just completely skipped over and the ending is even more hopeful than it seems?!?","Falling Down\n\nReactionary praxis, revolutionary aesthetics\n\nIn Falling Down, <PERSON> character (D-fens) is the blueprint of the misguided revolutionary movement. Passionate and deed-based, yet unfocused, individualistic, and most importantly focused on the liberation of the consumer rather than the worker. Consumption of product under the guise of doing something more worthwhile, is at the heart of Falling Down, from the very first scene the toothy snarl of a <PERSON> plushy sets off ‚ÄúD-fens‚Äôs‚Äù rage, causing him to skip work for that day, instead starting and unhinged consumer revolution, holding up fast food stores and bodegas, using threats of violence as leverage for enhanced convenience. Although he still seems ‚Äúcountercultural‚Äù and on the side of the people, this focus on the consumer is in reality inherently anti-revolutionary, screwing over those who make the products even more. <PERSON> does an excellent job at portraying the misguided revolutionary in this biting and sardonic satire.",0,"[1.8275209665298462, -1.541669249534607, -0.7630846500396729, -0.1589588224887848, -0.16470329463481903, -0.004536657594144344, 0.8054251670837402]"
6,"Rachel Getting Married\n\nsteadily grew on me over 2hrs, watching the <PERSON> interact, their hurt & compassion backgrounding every conversation. <PERSON>‚Äôs intimate stylizations hold this love story of a family like an open heart at its center, capturing the complexity & contradictions of these‚Äîat their core‚Äîvery real, very strong bonds thru grief / addiction / joy / life. some gut punches here, esp re the <PERSON>/rachel/<PERSON> dynamic.\nwatched this bc i got engaged in december and am starting to figure out my wedding (!!)\nadore how cozy & lively <PERSON>‚Äôs is from planning (making the seating chart!) to the rehearsal dinner to the main event which is sheer happiness, stops their world for a moment and allows them to bask in what can be so simply GOOD & celebratory. a place for them to put nothing but their love. atmospheric inspo and a beautiful, powerful sequence in the film","<PERSON>\n\nso so enamored with every part of this almost entirely due to <PERSON>‚Äôs charisma & light & talent, but also <PERSON> and the lush set design‚Äîfrom going wide on the stage to <PERSON> alone in her apartment, every look (her hair, her nails, her outfits!) & every room is exquisite, and‚Äîhuge!‚Äîevery line is charming.\nrequires suspension of disbelief as <PERSON> is so specifically gorgeous and has such an attractive presence that it‚Äôs difficult to figure that someone might not be instantly awed by her beauty. i don‚Äôt typically like musicals and i want to watch this again immediately.\n\nabout a woman who doesn‚Äôt realize her own self-worth (maybe nothing is more devastating here than when she makes sure people think her baby is pretty above all else) and thus has blinders on when it comes to her husband‚Äôs conning but <PERSON> is a layered character with more depth than simply being threatened by his wife‚Äôs success, and their love / his self-wrought, tortured predicament does come through at the same time as <PERSON>‚Äôs growth and the finale‚Äôs bittersweet optimism‚Ä¶ esp with ‚ÄúMy Man‚Äù being my favorite song here (her voice at ‚Äúwhen i know i‚Äôll come back on my knees someday‚Äù)‚Ä¶ and <PERSON> saying ‚Äúoh, <PERSON>‚Äù being pure romance‚Ä¶",0,"[2.0084774494171143, 0.4846120774745941, -0.12106988579034805, -0.4625290632247925, -0.5338815450668335, -1.4851081371307373, 0.10955332219600677]"
7,"Rachel Getting Married\n\nsteadily grew on me over 2hrs, watching the <PERSON> interact, their hurt & compassion backgrounding every conversation. <PERSON>‚Äôs intimate stylizations hold this love story of a family like an open heart at its center, capturing the complexity & contradictions of these‚Äîat their core‚Äîvery real, very strong bonds thru grief / addiction / joy / life. some gut punches here, esp re the <PERSON>/rachel/<PERSON> dynamic.\nwatched this bc i got engaged in december and am starting to figure out my wedding (!!)\nadore how cozy & lively <PERSON>‚Äôs is from planning (making the seating chart!) to the rehearsal dinner to the main event which is sheer happiness, stops their world for a moment and allows them to bask in what can be so simply GOOD & celebratory. a place for them to put nothing but their love. atmospheric inspo and a beautiful, powerful sequence in the film","<PERSON>\n\nfeels like <PERSON>‚Äîa permanently misguided voice‚Äîbuilt this as some uninspired riff on <PERSON> meets The Talented Mr. Ripley entirely because she liked <PERSON> ‚ÄúMurder on the Dancefloor.‚Äù\nher tiktok-primed sophomore effort isn‚Äôt actively harmful, certainly playing with some vague ideas about middle class upward mobility that never coalesce, but if you‚Äôre reading in good faith, don‚Äôt really intend to‚Ä¶\n\ni do not typically like <PERSON>‚Äôs work, i do not think i ‚Äúlike‚Äù this, but i‚Äôm not inclined to dunk on it too much bc i didn‚Äôt think it was trying to be the *edgiest, wackiest, wildest* movie of all time, and am not judging it as such. the freaky stuff was fine and fun enough. <PERSON>‚Äôs confidence goes FAR. am being very mild about all this, but it didn‚Äôt stir up enough to offend or inspire condescension. i had a dumb good time",0,"[2.245753049850464, -0.03886530175805092, -0.4941970109939575, -0.7094990015029907, -0.8547700047492981, -0.5568146109580994, 0.40839943289756775]"
8,"Rachel Getting Married\n\nsteadily grew on me over 2hrs, watching the <PERSON> interact, their hurt & compassion backgrounding every conversation. <PERSON>‚Äôs intimate stylizations hold this love story of a family like an open heart at its center, capturing the complexity & contradictions of these‚Äîat their core‚Äîvery real, very strong bonds thru grief / addiction / joy / life. some gut punches here, esp re the <PERSON>/rachel/<PERSON> dynamic.\nwatched this bc i got engaged in december and am starting to figure out my wedding (!!)\nadore how cozy & lively <PERSON>‚Äôs is from planning (making the seating chart!) to the rehearsal dinner to the main event which is sheer happiness, stops their world for a moment and allows them to bask in what can be so simply GOOD & celebratory. a place for them to put nothing but their love. atmospheric inspo and a beautiful, powerful sequence in the film","Little <PERSON>\n\nthe use of AI animation makes this pretty irredeemable. leaves a horrible taste that it doubles down on with <PERSON>'s character in the last act. <PERSON> is a stand-in for <PERSON> in this role (made me more sympathetic to Adaptation, which i've never loved, and <PERSON> most recent effort--both much better films) though this is really a rip-off of last year's Rotting in the Sun (with its animation (and general screenplay) further giving the impression that it was a rushed job).\ndeeply obnoxious first half transitions to a less-obnoxious second half that can't quite shake its roots, empty pretension still looming like a dark cloud. MVPs <PERSON> & <PERSON>",0,"[2.2381792068481445, -0.15823480486869812, 0.2343253642320633, -0.5075234770774841, -0.6299780607223511, -1.1193093061447144, -0.057456567883491516]"
9,"Rachel Getting Married\n\nsteadily grew on me over 2hrs, watching the <PERSON> interact, their hurt & compassion backgrounding every conversation. <PERSON>‚Äôs intimate stylizations hold this love story of a family like an open heart at its center, capturing the complexity & contradictions of these‚Äîat their core‚Äîvery real, very strong bonds thru grief / addiction / joy / life. some gut punches here, esp re the <PERSON>/rachel/<PERSON> dynamic.\nwatched this bc i got engaged in december and am starting to figure out my wedding (!!)\nadore how cozy & lively <PERSON>‚Äôs is from planning (making the seating chart!) to the rehearsal dinner to the main event which is sheer happiness, stops their world for a moment and allows them to bask in what can be so simply GOOD & celebratory. a place for them to put nothing but their love. atmospheric inspo and a beautiful, powerful sequence in the film","A Nos Amours\n\nagain lauding <PERSON> for his mix of clean cuts & observational style, this not quite passive but understated energy that lets his films breathe moment to moment; imbued with a light yet sustained understanding, his tone shifts mimicing the undulations of life.\n\nhe looks at the two lives of an adolescent, private/familial and public/social--the first enlightening the second--in a very specific way that resonates.. esp interesting given <PERSON>‚Äôs involvement as not only the film‚Äôs father but also <PERSON>‚Äôs, and how that informs his depiction of her. their sequences are some of the most rich.\na premier girlhood film.",0,"[2.115020751953125, 0.21499507129192352, -0.5389561653137207, -0.8219174146652222, -0.924434244632721, -0.5963159799575806, 0.5516079068183899]"


In [20]:
all_results_df.layer.value_counts()

layer
0    40
2    40
3    40
4    40
5    40
6    40
1    24
Name: count, dtype: int64

In [23]:
def aggregate_score(list_of_scores):
    scores = []
    for json_str in list_of_scores:
        if json_str == None:
            continue
        try:
            if "\n\n" in json_str:
                json_str = json_str.split("\n")[-1].strip()

            json_str = json_str.replace("Output:", "").strip()
                
                
            json_obj = json.loads(json_str)            
            scores.append(int(json_obj['score']))
        except:
            print(json_str)
            print('==============')
    return round(np.median(scores), 2)

# Aggregating layer score
layer_scores_df = all_results_df.groupby('layer').aggregate({
    'lexical': aggregate_score,
    'syntax': aggregate_score,
    'discourse': aggregate_score,
    'semantic': aggregate_score,
}).reset_index()

In [24]:
layer_scores_df.head(n=7)

Unnamed: 0,layer,lexical,syntax,discourse,semantic
0,0,1.0,2.0,2.0,1.0
1,1,2.0,3.0,3.0,2.0
2,2,1.0,3.0,2.0,1.0
3,3,2.0,3.0,2.0,2.0
4,4,2.0,3.0,2.0,2.0
5,5,2.0,3.0,2.5,2.0
6,6,2.0,3.0,2.5,2.0
