In [1]:
%load_ext autoreload

In [19]:
%autoreload
import sys
sys.path.insert(0, '../')

In [20]:
from src.utilities.mluar_utils import *

In [21]:
from transformers import AutoModel, AutoTokenizer
from datasets import load_dataset, Dataset, load_from_disk
import numpy as np
from einops import rearrange, reduce, repeat
import torch
from sklearn.metrics.pairwise import cosine_similarity
from matplotlib import pyplot as plt
import math
import pandas as pd
import pickle as pkl

pd.set_option('display.max_colwidth', None)


In [22]:
MULTI_LUAR_PATH =  "/mnt/swordfish-pool2/milad/multi-luar-reddit-model/"
LUAR_PATH =  "/mnt/swordfish-pool2/nikhil/LUAR/pretrained_weights/LUAR-MUD/"

In [6]:
# Load models
multiluar_model = AutoModel.from_pretrained(MULTI_LUAR_PATH, trust_remote_code=True)
luar_model = AutoModel.from_pretrained(LUAR_PATH, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("rrivera1849/LUAR-MUD")

In [7]:
# Load data
data_path = '/mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/data/hrs_06-27-24_english_perGenre-HRS2.1_TA2_input'
ground_truth_path = '/mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/groundtruth/hrs_06-27-24_english_perGenre-HRS2.1_TA2'
hiatus_data, _, _ = load_aa_data(data_path, ground_truth_path)

Loading:  /mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/data/hrs_06-27-24_english_perGenre-HRS2.1_TA2_input


In [8]:
# keep authors with only more than one text
authors_with_multiple_texts = [x[0] for x in hiatus_data.authorID.value_counts().to_dict().items() if x[1] > 1]
hiatus_data = hiatus_data[hiatus_data.authorID.isin(authors_with_multiple_texts)]

In [9]:
hiatus_data = hiatus_data.sample(2000)

In [10]:
hiatus_data_texts = hiatus_data.fullText.tolist()
labels = hiatus_data['authorID'].tolist()

# Compute embeddings
max_seq_length = 736
hiatus_data_embeddings = get_luar_embeddings(hiatus_data_texts, multiluar_model, tokenizer, max_length=max_seq_length, batch_size=1, is_multi_luar=True)

In [11]:
#np.max([len(x.split()) for x in hiatus_data.fullText.tolist()])

### Experiment Design

- For each layer, we find pairs of ground-truth texts written by the same author where the corresponding layer's embedding scored them high compared to other layers
- For each layer, we take a sample of these pairs of texts and prompt ChatGPT to find which linguistic level they are similar

#### Step 1:

In [12]:
# Compute Multi-luar similarity matrices at every layer and average of the layers
muti_luar_layers_sims = [compute_similarities(hiatus_data_embeddings, hiatus_data_embeddings, layer=i) for i in range(7)]
#muti_luar_layers_sims.append(compute_similarities(hiatus_data_embeddings, hiatus_data_embeddings, layer=None))
muti_luar_layers_sims = np.stack(muti_luar_layers_sims)

In [23]:
# Compute significant pairs to layers
layer_to_sig_pairs = {layer: extract_sig_pairs_for_layer(hiatus_data_texts, muti_luar_layers_sims, labels, layer) for layer in range(7)}

In [24]:
layer_x_pairs = []
for layer in [0, 1, 2, 3, 4, 5, 6]:
    print(layer, len(layer_to_sig_pairs[layer]))
    sample_of_pairs = layer_to_sig_pairs[layer][:10]
    layer_x_pairs += [{'text-1': x[0], 'text-2': x[1], 'z-score': x[2], 'layer-sim': x[3], 'layer': layer} for x in sample_of_pairs]
layer_x_pairs_df = pd.DataFrame(layer_x_pairs)

0 606
1 0
2 1
3 2
4 0
5 0
6 5


In [27]:
layer_x_pairs_df[layer_x_pairs_df.layer >= 0].head()

Unnamed: 0,text-1,text-2,z-score,layer-sim,layer
0,"Honmein Resistance Bands\n\nTake your home workout up many levels with this 3-pack of resistance bands. Each band offers a different level of resistance (low, medium, high) so that you can gradually work your way up to the most effective exercises. Perfect for squats, lunges, donkey kicks, and tricep dips. The three bands are all made in a thick fabric that is durable through the toughest workouts and never rolls up! This set comes in a simple carrying bag so that you can take your workout easily with you to the park, beach, vacation, or even the office. This simple and affordable set is the easiest way to stay toned!","Cra-Z-Art Jigsaw Puzzle Glue with Applicator\n\nAfter spending days or weeks putting together all those pieces of your jigsaw puzzle, what better way to appreciate your work than by turning into a piece of art! Seal in your memories of working on your puzzle with this jigsaw puzzle glue. Simply swipe a thin layer on top of your puzzle using the included applicator, let it set, and your artwork is set! Most puzzles will hold with one layer of glue, but you can put two layers for added protection. And with a 133ml jar, you will have enough to seal in multiple puzzles! The glue does not smell and is non-toxic. It dries clear and it simple to clean off from any accidents. This small bottle can be the key to keeping all your puzzle memories together!","[2.0897844, -1.0721691, -1.1436987, -0.005097844, -0.0016726544, -0.22196682, 0.35483754]","[0.8800262, 0.8021675, 0.80040616, 0.82844263, 0.828527, 0.82310253, 0.83730555]",0
1,"Permanent Vacation\n\n“Permanent Vacation” was shot shortly after <PERSON> dropped out of film school…..and yeah, it looks like it. It´s rough, barebone, and underdeveloped. This kind of aimless, plotless hangout film only works when the characters are interesting and, in my opinion, they´re not. Therefore, the movie becomes a drag despite the short runtime. Nevertheless, I like the rebellious attitude and some of the images. The film works best as an idiosyncratic portrait of New York. Even at this point you could predict that <PERSON> would become a director whose movies are characterized by strong vibes.","Nosferatu the Vampyre\n\nI would simply stay with my wife <PERSON> rather than travelling to Transylvania to meet some weirdo <PERSON> but that´s just me.\n\nGerman expressionism meets New German cinema. “Nosferatu the Vampyre” is my favorite horror remake, my favorite vampire film, and my favorite horror mood piece. The atmosphere and use of light and shadows are simply impeccable. The film is a visceral experience. It´s eerie, haunting, and deeply evocative, and <PERSON> delivers another banger score. It´s also a fascinating portrayal of the total collapse of a patriarchal society because nobody listens to the one woman who knows what´s going on. <PERSON> and <PERSON> are the greatest screen duo in Herzog´s filmography.\n\nHere, here, and here are my previous reviews and here is <PERSON> for his Great Movies selection.\n\n<PERSON> movies ranked","[2.2815864, -0.8527378, 0.16573595, -0.8355943, -0.43191236, -0.38879085, 0.06171658]","[0.9552749, 0.90302163, 0.9200009, 0.90330744, 0.91003734, 0.91075623, 0.9182668]",0
2,"The Hunger\n\nyou belong to me. we belong to each other.\nokay i kind of am obsessed... it doesn't explore its themes of the fear of aging or the loneliness of immortality as deeply as it needs to to maximize its potential, and i wish the intersection of the two was more complicated in <PERSON>, but the smoldering sexuality and the visual balladry are hard not to find bewitching... i think this ought to be at least as much of an 80s vampire classic as the lost boys and fright night, and i honestly prefer this to <PERSON> much more famous top gun, even if its bite doesn't quite match its ambitions. still, i found it very moving.","Hard to Die\n\nnow i remember! <PERSON>'s the guy that was involved in that sorority house massacre a few months ago!\nsimilarly absurd to the previous film, but amped up a notch and the better for it. a stronger atmosphere, more absurd, unendingly nonsensical; just so fucking bizarre and stupid in a way that kinda rules. unfathomable, but captivating- this is <PERSON> at his b-est and best. i do think the twist in the second film remains unbeaten, though this is the better film (and best in the sorority house massacre trilogy) overall.\n\n#ReleaseSororityHouseMassacreTheFinalExam!","[2.2915294, -0.7005312, 0.1221367, -0.4979419, -0.7640231, -0.6191468, 0.1679615]","[0.91908747, 0.8612865, 0.8771789, 0.86520016, 0.86006, 0.8628587, 0.87806416]",0
3,"The Wrecking Crew\n\nhold on to your miniskirt!\nof course, we all know this as the movie margot <PERSON>'s <PERSON> watched in once upon a time in hollywood, and that along with it being one of only 6 movies the real sharon <PERSON> made (excluding uncredited cameos) are its main pop culture legacy. it's fun, it's fine. the orientalism is troubling and worth noting, but it's overall a fine movie. the soundtrack slaps, and the whole thing is just so, so, so late 60s in a way thats kind of irresistible, despite glaring flaws. and <PERSON> is lovely in it, she reminded me a lot of <PERSON>. big physical comedy, and often-underplayed comic lines which heightened their effect consistently- ""oh, my hat! my hat's okay!"" isn't that funny of a line but her delivery of it made it, to me, the best joke in the film! she was radiant, and the picture really didn't do much to deserve her, but it's an entertaining enough watch to be worthwhile.","Hard to Die\n\nnow i remember! <PERSON>'s the guy that was involved in that sorority house massacre a few months ago!\nsimilarly absurd to the previous film, but amped up a notch and the better for it. a stronger atmosphere, more absurd, unendingly nonsensical; just so fucking bizarre and stupid in a way that kinda rules. unfathomable, but captivating- this is <PERSON> at his b-est and best. i do think the twist in the second film remains unbeaten, though this is the better film (and best in the sorority house massacre trilogy) overall.\n\n#ReleaseSororityHouseMassacreTheFinalExam!","[2.0687022, -0.40725115, -0.69150245, 0.054661825, -1.0614951, -0.6693116, 0.7061901]","[0.9241398, 0.9000653, 0.89730144, 0.90455663, 0.8937039, 0.8975172, 0.91089165]",0
4,"Toy Soldiers\n\nI really don't know what to make of this. On the one hand, it's too violent to be a kid's adventure, and on the other, the violence, whilst occasionally hard, is too flacid to make it a legitimate action film.\n\nSo what are we left with? Well, not really all that much. The pacing is way off, with not a lot happening for the first half an hour (save for an amusing prank involving moving furniture outside.) There are attempts to differentiate the boys, but really, if you're not <PERSON> or <PERSON>, it doesn't really matter. I don't even think a few of them are named!\n\nOnce the main plot does kick in, it's fairly generic, which is another of the film's problems. Save for one oddly judged character motivation, you've seen everything in this done better elsewhere. It's not a stretch to guess how the whole thing's going to turn out.\n\nStill, the cast are largely serviceable, and it's fun seeing people like <PERSON>, <PERSON>, and <PERSON> try and keep a straight face. <PERSON> is also enjoyably demented as the main villain.\n\nToy Soldiers is a rote, but fairly enjoyable thriller that you probably won't remember long after watching it.","Foul Play\n\nThis mish-mash of broad comedy, thriller, and romance shouldn't really work, but I'll be damned if this isn't a hell of a lot of fun.\n\nAfter picking up a hitch-hiker with a secret, meek librarian <PERSON> (<PERSON>) becomes embroiled in a sinister plot to assassinate the Pope.\n\nCue pratfalls aplenty, evil twins, cases of mistaken identities, albino kidnaps, dwarf abuse, old ladies swearing via the medium of scrabble, terrified Japanese tourists, and possibly the funniest martial arts brawl I think I've ever seen.\n\nThe cast are entirely game, and help sell this slightly absurd film, which sometimes feels like a parody of itself.\n\n<PERSON> and <PERSON> have an easy chemistry, and their scenes together are great. <PERSON> herself is a joy to watch, and there's a sincerity to her performance that's incredibly charming. She has wonderful comic timing, and my God is she ever beautiful in this. The camera adores her.\n\nThere's also great support from <PERSON> as <PERSON>'s snake owning landlord, <PERSON> as <PERSON>'s street smart friend <PERSON>, and <PERSON>, who went on to be cast in 10 from his performance here.\n\nThere are nice nods to the works of <PERSON>, most notably the macguffin, which ends up having no bearing on the plot at all. Some of the early scenes manage to be really quite tense, and the climax at the theatre is handled effeciently.\n\nI also thought there was a delicious irony in the fact that the antagonists hate the Church because they feel it is a corrupt institution funded by corporations, and the man playing the Pope was a prominent San Francisco businessman at the time.","[2.1971657, -0.82974297, -0.4425488, -0.9760793, -0.21128598, -0.22674227, 0.489239]","[0.9325719, 0.90000486, 0.90417075, 0.8984304, 0.90665895, 0.90649265, 0.914196]",0


In [33]:
layer_x_pairs_df.to_pickle('../data/layer_to_pairs_signficance.pkl', 'wb')

#### Step 2:

In [1]:
from datadreamer import DataDreamer
from datadreamer.llms import HFTransformers, ParallelLLM, OpenAI
from datadreamer.steps import DataFromPrompt, ProcessWithPrompt,  HFHubDataSource, DataSource, zipped, concat
from functools import partial
from transformers import QuantoConfig
from datasets import concatenate_datasets, load_dataset
import json


model = OpenAI(model_name="gpt-4", api_key='sk-proj-zTbZNk16Ik1pZnqLn38ZT3BlbkFJImq3pd7widkr7RzsC771') #kathy's lab
# model = HFTransformers(
#                 "meta-llama/Meta-Llama-3-8B-Instruct",
#                 quantization_config=QuantoConfig(weights="int8"),
#                 device=0,
#                 device_map="cuda",
#             )
# model.config.pad_token_id = model.config.eos_token_id

def gen_from_iterable_dataset(iterable_ds):
    yield from iterable_ds

def evaluate_text_similarities(data_path, document_pairs, linguistic_lvl_name, linguistic_lvl_desc):
    instruction = "Given the two Documents below, rate their <linguistic_lvl> on a scale from 1 to 5: Score 1 equals very low similarity and score 5 equals high similarity. First, give reasons for your score and then output the score. The output should be in the following format: {\"reasons\": \"explain your rating\",  \"score\": \"<json integer>\"}"
    instruction = instruction.replace("<linguistic_lvl>", linguistic_lvl_desc)
    
    with DataDreamer(data_path):
        datasource = DataSource('documents', Dataset.from_list(document_pairs))
        datasource = datasource.map(lambda row: {'inputs': 'Document 1:\n {} \n Document 2:\n {}'.format(row['text-1'], row['text-2'])}, auto_progress=False)
        ds_focus_questions = ProcessWithPrompt(
          "{} describe text similarity".format(linguistic_lvl_name),
          inputs={"inputs": datasource.output["inputs"]},
          args={
             "llm": model,
             "n": 1,
             "instruction": instruction
          },
          outputs={"generations": linguistic_lvl_name},
        ).select_columns([linguistic_lvl_name])
        
        zipped_step = zipped(datasource, ds_focus_questions)

        results_iter = zipped_step.output.dataset
        results_ds   = Dataset.from_generator(partial(gen_from_iterable_dataset, results_iter))

        return results_ds

In [8]:
layer_to_sig_pairs = pkl.load(open('../data/layer_to_pairs_signficance.pkl', 'rb'))

In [9]:
layer_x_pairs = []
for layer in [0, 1, 2, 3, 4, 5, 6]:
    print(layer, len(layer_to_sig_pairs[layer]))
    sample_of_pairs = layer_to_sig_pairs[layer][:10]
    layer_x_pairs += [{'text-1': x[0], 'text-2': x[1], 'z-score': x[2], 'layer': layer} for x in sample_of_pairs]

0 2154
1 6
2 20
3 108
4 16
5 56
6 890


In [10]:
len(layer_x_pairs)

66

In [11]:
# Given the two Documents below, rate their <linguistic_lvl> on a scale from 1 to 5: Score 1 equals very low similarity and score 5 equals high similarity. First, give reasons for your score and then output the score in the following output format: {\"reasons\": \"explain your rating\",  \"score\": \"<json integer>\"}
ling_phenomena = {
    'syntax': 'syntactic similarity',
    'semantic': 'semantic similarity',
    'lexical': 'lexical similarity',
    'discourse': 'discourse similarity'
}

In [12]:
results = []
for key, val in ling_phenomena.items():
    results.append(evaluate_text_similarities('./output', layer_x_pairs, key, val))

[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Initialized. 🚀 Dreaming to folder: ./output
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents' results loaded from disk. 🙌 It was previously run and saved.
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' will run lazily. 🥱
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' was previously run and saved, but was outdated. 😞
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' finished running lazily. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' progress: 50 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' finished and is saved to disk. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similar

In [13]:
all_results = concatenate_datasets(results)

In [14]:
all_results.save_to_disk('../data/described_similarities_ds')

Saving the dataset (0/1 shards):   0%|          | 0/264 [00:00<?, ? examples/s]

### Analyze layer to lingustic similarity:

In [15]:
all_results = load_from_disk('../data/described_similarities_ds')

In [16]:
all_results_df = all_results.to_pandas()

In [17]:
zscores= all_results_df['z-score'].tolist()

In [26]:
all_results_df[['text-1', 'text-2', 'layer', 'z-score']].head(n=50)

Unnamed: 0,text-1,text-2,layer,z-score
0,"Anatomy of a Fall\n\ni'm fully aware it’s not a fair comparison, but i’d like to be able to have a foundation of a dead-body-case type work-of-fiction to base my conclusion on how weak or strong the one i’m watching currently is. recently, and in this case, i’ve come to referring to disco elysium, of which strongly elicited throughout my entire lengthy time with it what the evident culmination of anatomy of a fall - the kitchen fight recording, some of the child actors scenes - coaxed. immediately and on a purely cosmetic and surface level, Fall lacks any type of relatively incisive and personally disruptive style. this sort of flatly lit, boringly pictured and blocked, framed with little to no intention movie reminds me of Tar, from a few years ago, and at least for a little while we were happy with this natural Lightroom preset of a color palate. the question remains that if the director moved focus away from the visuals to focus on the performances, and the performances aren’t at all times captivating squarely enough to fill the vacuum, then why are you wasting our time\n\naddendum: <PERSON> states on an interview i read on cinematography world that the original intention was to shoot on 35, and they rather proceeded to shoot hawk v lite anamorphic on alexa then matte out the beautiful academy ratio to widescreen. horrific world we live in","<PERSON>\n\ngoing back and watching these in the original language instead of the goofy english dubs. a lot of people say this is the best one but i'm not really sure. most of them are pretty great. all of them are intensely watchable. this one in particular has the most complete plot with characters who are fearless and show each other mercy. on atmosphere alone the winner could go to maybe four or five of ghibli's other works. i remember watching this one as a kid pretty intensely since it's so violent and the sound effects try to reduce the gore to these cartoonish kinda tom and jerry actions but theres still so much blood. even when i was 10 or 12 i begin to notice that there weren't really characters like <PERSON>'s in american children's media, like role models that had personal resolve but that were also deeply flawed. cartoons here are much more concerned with being sardonic or melodramatic. the part where san baby birds the jerky in the main dude's mouth always gets me",0,"[1.6476448774337769, -1.2187529802322388, -0.9693524241447449, 0.131813183426857, -0.2457742691040039, -0.5583204627037048, 1.21274995803833]"
1,"The Power of the Dog\n\nin isolation there are very strong parts, unfortunately connected together with poorly directed ones that kind of meander without direction. the best scenes are cut too short and show nothing and the badly filmed ones always stretch way too long and show way too much. this film as a whole explores who or what killed <PERSON>. we know, because we've seen every part of the chain that leads to his death. however, where the entire thing fails is that it almost seems like we know more than the movie does","<PERSON>\n\ngoing back and watching these in the original language instead of the goofy english dubs. a lot of people say this is the best one but i'm not really sure. most of them are pretty great. all of them are intensely watchable. this one in particular has the most complete plot with characters who are fearless and show each other mercy. on atmosphere alone the winner could go to maybe four or five of ghibli's other works. i remember watching this one as a kid pretty intensely since it's so violent and the sound effects try to reduce the gore to these cartoonish kinda tom and jerry actions but theres still so much blood. even when i was 10 or 12 i begin to notice that there weren't really characters like <PERSON>'s in american children's media, like role models that had personal resolve but that were also deeply flawed. cartoons here are much more concerned with being sardonic or melodramatic. the part where san baby birds the jerky in the main dude's mouth always gets me",0,"[2.2426974773406982, -0.6790010333061218, -0.34340301156044006, -0.0539296455681324, -0.7155317068099976, -0.8478379845619202, 0.39701059460639954]"
2,"<PERSON>\n\ngoing back and watching these in the original language instead of the goofy english dubs. a lot of people say this is the best one but i'm not really sure. most of them are pretty great. all of them are intensely watchable. this one in particular has the most complete plot with characters who are fearless and show each other mercy. on atmosphere alone the winner could go to maybe four or five of ghibli's other works. i remember watching this one as a kid pretty intensely since it's so violent and the sound effects try to reduce the gore to these cartoonish kinda tom and jerry actions but theres still so much blood. even when i was 10 or 12 i begin to notice that there weren't really characters like <PERSON>'s in american children's media, like role models that had personal resolve but that were also deeply flawed. cartoons here are much more concerned with being sardonic or melodramatic. the part where san baby birds the jerky in the main dude's mouth always gets me","Anatomy of a Fall\n\ni'm fully aware it’s not a fair comparison, but i’d like to be able to have a foundation of a dead-body-case type work-of-fiction to base my conclusion on how weak or strong the one i’m watching currently is. recently, and in this case, i’ve come to referring to disco elysium, of which strongly elicited throughout my entire lengthy time with it what the evident culmination of anatomy of a fall - the kitchen fight recording, some of the child actors scenes - coaxed. immediately and on a purely cosmetic and surface level, Fall lacks any type of relatively incisive and personally disruptive style. this sort of flatly lit, boringly pictured and blocked, framed with little to no intention movie reminds me of Tar, from a few years ago, and at least for a little while we were happy with this natural Lightroom preset of a color palate. the question remains that if the director moved focus away from the visuals to focus on the performances, and the performances aren’t at all times captivating squarely enough to fill the vacuum, then why are you wasting our time\n\naddendum: <PERSON> states on an interview i read on cinematography world that the original intention was to shoot on 35, and they rather proceeded to shoot hawk v lite anamorphic on alexa then matte out the beautiful academy ratio to widescreen. horrific world we live in",0,"[1.647646427154541, -1.2187526226043701, -0.9693564176559448, 0.13181640207767487, -0.24577262997627258, -0.5583148002624512, 1.2127469778060913]"
3,"<PERSON>\n\ngoing back and watching these in the original language instead of the goofy english dubs. a lot of people say this is the best one but i'm not really sure. most of them are pretty great. all of them are intensely watchable. this one in particular has the most complete plot with characters who are fearless and show each other mercy. on atmosphere alone the winner could go to maybe four or five of ghibli's other works. i remember watching this one as a kid pretty intensely since it's so violent and the sound effects try to reduce the gore to these cartoonish kinda tom and jerry actions but theres still so much blood. even when i was 10 or 12 i begin to notice that there weren't really characters like <PERSON>'s in american children's media, like role models that had personal resolve but that were also deeply flawed. cartoons here are much more concerned with being sardonic or melodramatic. the part where san baby birds the jerky in the main dude's mouth always gets me","The Power of the Dog\n\nin isolation there are very strong parts, unfortunately connected together with poorly directed ones that kind of meander without direction. the best scenes are cut too short and show nothing and the badly filmed ones always stretch way too long and show way too much. this film as a whole explores who or what killed <PERSON>. we know, because we've seen every part of the chain that leads to his death. however, where the entire thing fails is that it almost seems like we know more than the movie does",0,"[2.2426974773406982, -0.6790010333061218, -0.34340301156044006, -0.0539296455681324, -0.7155317068099976, -0.8478379845619202, 0.39701059460639954]"
4,"Falling Down\n\nReactionary praxis, revolutionary aesthetics\n\nIn Falling Down, <PERSON> character (D-fens) is the blueprint of the misguided revolutionary movement. Passionate and deed-based, yet unfocused, individualistic, and most importantly focused on the liberation of the consumer rather than the worker. Consumption of product under the guise of doing something more worthwhile, is at the heart of Falling Down, from the very first scene the toothy snarl of a <PERSON> plushy sets off “D-fens’s” rage, causing him to skip work for that day, instead starting and unhinged consumer revolution, holding up fast food stores and bodegas, using threats of violence as leverage for enhanced convenience. Although he still seems “countercultural” and on the side of the people, this focus on the consumer is in reality inherently anti-revolutionary, screwing over those who make the products even more. <PERSON> does an excellent job at portraying the misguided revolutionary in this biting and sardonic satire.","<PERSON>\n\nSo in the last scene based on the year on the headstone and the trees, this movie ends sometime in early fall 2001. It could reasonably assumed based on the trees and New York location that this movie ends sometime around early to mid September. I know I’m just over analyzing this but that means either:\n\nA. This film ends less than a week before 9/11, and symbolically may be one of the bleakest endings ever made.\n\nB. This film ends directly after 9/11 and the ending which is just completely skipped over and the ending is even more hopeful than it seems?!?",0,"[1.827519416809082, -1.5416673421859741, -0.7630870342254639, -0.15895895659923553, -0.16470670700073242, -0.004535033833235502, 0.8054291605949402]"
5,"<PERSON>\n\nSo in the last scene based on the year on the headstone and the trees, this movie ends sometime in early fall 2001. It could reasonably assumed based on the trees and New York location that this movie ends sometime around early to mid September. I know I’m just over analyzing this but that means either:\n\nA. This film ends less than a week before 9/11, and symbolically may be one of the bleakest endings ever made.\n\nB. This film ends directly after 9/11 and the ending which is just completely skipped over and the ending is even more hopeful than it seems?!?","Falling Down\n\nReactionary praxis, revolutionary aesthetics\n\nIn Falling Down, <PERSON> character (D-fens) is the blueprint of the misguided revolutionary movement. Passionate and deed-based, yet unfocused, individualistic, and most importantly focused on the liberation of the consumer rather than the worker. Consumption of product under the guise of doing something more worthwhile, is at the heart of Falling Down, from the very first scene the toothy snarl of a <PERSON> plushy sets off “D-fens’s” rage, causing him to skip work for that day, instead starting and unhinged consumer revolution, holding up fast food stores and bodegas, using threats of violence as leverage for enhanced convenience. Although he still seems “countercultural” and on the side of the people, this focus on the consumer is in reality inherently anti-revolutionary, screwing over those who make the products even more. <PERSON> does an excellent job at portraying the misguided revolutionary in this biting and sardonic satire.",0,"[1.8275209665298462, -1.541669249534607, -0.7630846500396729, -0.1589588224887848, -0.16470329463481903, -0.004536657594144344, 0.8054251670837402]"
6,"Rachel Getting Married\n\nsteadily grew on me over 2hrs, watching the <PERSON> interact, their hurt & compassion backgrounding every conversation. <PERSON>’s intimate stylizations hold this love story of a family like an open heart at its center, capturing the complexity & contradictions of these—at their core—very real, very strong bonds thru grief / addiction / joy / life. some gut punches here, esp re the <PERSON>/rachel/<PERSON> dynamic.\nwatched this bc i got engaged in december and am starting to figure out my wedding (!!)\nadore how cozy & lively <PERSON>’s is from planning (making the seating chart!) to the rehearsal dinner to the main event which is sheer happiness, stops their world for a moment and allows them to bask in what can be so simply GOOD & celebratory. a place for them to put nothing but their love. atmospheric inspo and a beautiful, powerful sequence in the film","<PERSON>\n\nso so enamored with every part of this almost entirely due to <PERSON>’s charisma & light & talent, but also <PERSON> and the lush set design—from going wide on the stage to <PERSON> alone in her apartment, every look (her hair, her nails, her outfits!) & every room is exquisite, and—huge!—every line is charming.\nrequires suspension of disbelief as <PERSON> is so specifically gorgeous and has such an attractive presence that it’s difficult to figure that someone might not be instantly awed by her beauty. i don’t typically like musicals and i want to watch this again immediately.\n\nabout a woman who doesn’t realize her own self-worth (maybe nothing is more devastating here than when she makes sure people think her baby is pretty above all else) and thus has blinders on when it comes to her husband’s conning but <PERSON> is a layered character with more depth than simply being threatened by his wife’s success, and their love / his self-wrought, tortured predicament does come through at the same time as <PERSON>’s growth and the finale’s bittersweet optimism… esp with “My Man” being my favorite song here (her voice at “when i know i’ll come back on my knees someday”)… and <PERSON> saying “oh, <PERSON>” being pure romance…",0,"[2.0084774494171143, 0.4846120774745941, -0.12106988579034805, -0.4625290632247925, -0.5338815450668335, -1.4851081371307373, 0.10955332219600677]"
7,"Rachel Getting Married\n\nsteadily grew on me over 2hrs, watching the <PERSON> interact, their hurt & compassion backgrounding every conversation. <PERSON>’s intimate stylizations hold this love story of a family like an open heart at its center, capturing the complexity & contradictions of these—at their core—very real, very strong bonds thru grief / addiction / joy / life. some gut punches here, esp re the <PERSON>/rachel/<PERSON> dynamic.\nwatched this bc i got engaged in december and am starting to figure out my wedding (!!)\nadore how cozy & lively <PERSON>’s is from planning (making the seating chart!) to the rehearsal dinner to the main event which is sheer happiness, stops their world for a moment and allows them to bask in what can be so simply GOOD & celebratory. a place for them to put nothing but their love. atmospheric inspo and a beautiful, powerful sequence in the film","<PERSON>\n\nfeels like <PERSON>—a permanently misguided voice—built this as some uninspired riff on <PERSON> meets The Talented Mr. Ripley entirely because she liked <PERSON> “Murder on the Dancefloor.”\nher tiktok-primed sophomore effort isn’t actively harmful, certainly playing with some vague ideas about middle class upward mobility that never coalesce, but if you’re reading in good faith, don’t really intend to…\n\ni do not typically like <PERSON>’s work, i do not think i “like” this, but i’m not inclined to dunk on it too much bc i didn’t think it was trying to be the *edgiest, wackiest, wildest* movie of all time, and am not judging it as such. the freaky stuff was fine and fun enough. <PERSON>’s confidence goes FAR. am being very mild about all this, but it didn’t stir up enough to offend or inspire condescension. i had a dumb good time",0,"[2.245753049850464, -0.03886530175805092, -0.4941970109939575, -0.7094990015029907, -0.8547700047492981, -0.5568146109580994, 0.40839943289756775]"
8,"Rachel Getting Married\n\nsteadily grew on me over 2hrs, watching the <PERSON> interact, their hurt & compassion backgrounding every conversation. <PERSON>’s intimate stylizations hold this love story of a family like an open heart at its center, capturing the complexity & contradictions of these—at their core—very real, very strong bonds thru grief / addiction / joy / life. some gut punches here, esp re the <PERSON>/rachel/<PERSON> dynamic.\nwatched this bc i got engaged in december and am starting to figure out my wedding (!!)\nadore how cozy & lively <PERSON>’s is from planning (making the seating chart!) to the rehearsal dinner to the main event which is sheer happiness, stops their world for a moment and allows them to bask in what can be so simply GOOD & celebratory. a place for them to put nothing but their love. atmospheric inspo and a beautiful, powerful sequence in the film","Little <PERSON>\n\nthe use of AI animation makes this pretty irredeemable. leaves a horrible taste that it doubles down on with <PERSON>'s character in the last act. <PERSON> is a stand-in for <PERSON> in this role (made me more sympathetic to Adaptation, which i've never loved, and <PERSON> most recent effort--both much better films) though this is really a rip-off of last year's Rotting in the Sun (with its animation (and general screenplay) further giving the impression that it was a rushed job).\ndeeply obnoxious first half transitions to a less-obnoxious second half that can't quite shake its roots, empty pretension still looming like a dark cloud. MVPs <PERSON> & <PERSON>",0,"[2.2381792068481445, -0.15823480486869812, 0.2343253642320633, -0.5075234770774841, -0.6299780607223511, -1.1193093061447144, -0.057456567883491516]"
9,"Rachel Getting Married\n\nsteadily grew on me over 2hrs, watching the <PERSON> interact, their hurt & compassion backgrounding every conversation. <PERSON>’s intimate stylizations hold this love story of a family like an open heart at its center, capturing the complexity & contradictions of these—at their core—very real, very strong bonds thru grief / addiction / joy / life. some gut punches here, esp re the <PERSON>/rachel/<PERSON> dynamic.\nwatched this bc i got engaged in december and am starting to figure out my wedding (!!)\nadore how cozy & lively <PERSON>’s is from planning (making the seating chart!) to the rehearsal dinner to the main event which is sheer happiness, stops their world for a moment and allows them to bask in what can be so simply GOOD & celebratory. a place for them to put nothing but their love. atmospheric inspo and a beautiful, powerful sequence in the film","A Nos Amours\n\nagain lauding <PERSON> for his mix of clean cuts & observational style, this not quite passive but understated energy that lets his films breathe moment to moment; imbued with a light yet sustained understanding, his tone shifts mimicing the undulations of life.\n\nhe looks at the two lives of an adolescent, private/familial and public/social--the first enlightening the second--in a very specific way that resonates.. esp interesting given <PERSON>’s involvement as not only the film’s father but also <PERSON>’s, and how that informs his depiction of her. their sequences are some of the most rich.\na premier girlhood film.",0,"[2.115020751953125, 0.21499507129192352, -0.5389561653137207, -0.8219174146652222, -0.924434244632721, -0.5963159799575806, 0.5516079068183899]"


In [20]:
all_results_df.layer.value_counts()

layer
0    40
2    40
3    40
4    40
5    40
6    40
1    24
Name: count, dtype: int64

In [23]:
def aggregate_score(list_of_scores):
    scores = []
    for json_str in list_of_scores:
        if json_str == None:
            continue
        try:
            if "\n\n" in json_str:
                json_str = json_str.split("\n")[-1].strip()

            json_str = json_str.replace("Output:", "").strip()
                
                
            json_obj = json.loads(json_str)            
            scores.append(int(json_obj['score']))
        except:
            print(json_str)
            print('==============')
    return round(np.median(scores), 2)

# Aggregating layer score
layer_scores_df = all_results_df.groupby('layer').aggregate({
    'lexical': aggregate_score,
    'syntax': aggregate_score,
    'discourse': aggregate_score,
    'semantic': aggregate_score,
}).reset_index()

In [24]:
layer_scores_df.head(n=7)

Unnamed: 0,layer,lexical,syntax,discourse,semantic
0,0,1.0,2.0,2.0,1.0
1,1,2.0,3.0,3.0,2.0
2,2,1.0,3.0,2.0,1.0
3,3,2.0,3.0,2.0,2.0
4,4,2.0,3.0,2.0,2.0
5,5,2.0,3.0,2.5,2.0
6,6,2.0,3.0,2.5,2.0
