In [40]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
%autoreload
import sys
sys.path.insert(0, '../')

In [42]:
from src.utilities.mluar_utils import *

In [43]:
from transformers import AutoModel, AutoTokenizer
from datasets import load_dataset, Dataset, load_from_disk
import numpy as np
from einops import rearrange, reduce, repeat
import torch
from sklearn.metrics.pairwise import cosine_similarity
from matplotlib import pyplot as plt
import math
import pandas as pd
import pickle as pkl

pd.set_option('display.max_colwidth', None)


In [5]:
MULTI_LUAR_PATH =  "/mnt/swordfish-pool2/milad/multi-luar-reddit-model/"
LUAR_PATH =  "/mnt/swordfish-pool2/nikhil/LUAR/pretrained_weights/LUAR-MUD/"

In [6]:
# Load models
multiluar_model = AutoModel.from_pretrained(MULTI_LUAR_PATH, trust_remote_code=True)
luar_model = AutoModel.from_pretrained(LUAR_PATH, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("rrivera1849/LUAR-MUD")

In [59]:
# Load data
data_path = '/mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/data/hrs_06-27-24_english_perGenre-HRS2.1_TA2_input'
ground_truth_path = '/mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/groundtruth/hrs_06-27-24_english_perGenre-HRS2.1_TA2'
hiatus_data, _, _ = load_aa_data(data_path, ground_truth_path)

Loading:  /mnt/swordfish-pool2/milad/hiatus-data/phase_2/mode_perGenre-HRS2.1/TA2/hrs_06-27-24_english_perGenre-HRS2.1/data/hrs_06-27-24_english_perGenre-HRS2.1_TA2_input


In [60]:
# keep authors with only more than one text
authors_with_multiple_texts = [x[0] for x in hiatus_data.authorID.value_counts().to_dict().items() if x[1] > 1]
hiatus_data = hiatus_data[hiatus_data.authorID.isin(authors_with_multiple_texts)]

In [61]:
hiatus_data = hiatus_data.sample(2000)

In [65]:
np.median([len(x.split()) for x in hiatus_data_texts])

np.float64(159.0)

In [66]:
hiatus_data_texts = hiatus_data.fullText.tolist()
labels = hiatus_data['authorID'].tolist()

# Compute embeddings
max_seq_length = 192
hiatus_data_embeddings, _= get_luar_embeddings(hiatus_data_texts, multiluar_model, tokenizer, max_length=max_seq_length, batch_size=1, is_multi_luar=True)

In [26]:
#np.max([len(x.split()) for x in hiatus_data.fullText.tolist()])

### Experiment Design

- For each layer, we find pairs of ground-truth texts written by the same author where the corresponding layer's embedding scored them high compared to other layers
- For each layer, we take a sample of these pairs of texts and prompt ChatGPT to find which linguistic level they are similar

#### Step 1:

In [67]:
# Compute Multi-luar similarity matrices at every layer and average of the layers
muti_luar_layers_sims = [compute_similarities(hiatus_data_embeddings, hiatus_data_embeddings, layer=i) for i in range(7)]
#muti_luar_layers_sims.append(compute_similarities(hiatus_data_embeddings, hiatus_data_embeddings, layer=None))
muti_luar_layers_sims = np.stack(muti_luar_layers_sims)

In [68]:
# Compute significant pairs to layers
layer_to_sig_pairs = {layer: extract_sig_pairs_for_layer(hiatus_data_texts, muti_luar_layers_sims, layer) for layer in range(7)}

In [69]:
layer_x_pairs = []
for layer in [0, 1, 2, 3, 4, 5, 6]:
    print(layer, len(layer_to_sig_pairs[layer]))
    sample_of_pairs = layer_to_sig_pairs[layer][:10]
    layer_x_pairs += [{'text-1': x[0], 'text-2': x[1], 'z-score': x[2], 'layer-sim': x[3], 'layer': layer} for x in sample_of_pairs]
layer_x_pairs_df = pd.DataFrame(layer_x_pairs)

0 555
1 32
2 17
3 275
4 50
5 8
6 360


In [70]:
layer_x_pairs_df.sample(n=10).head(n=10)

Unnamed: 0,text-1,text-2,z-score,layer-sim,layer
59,"One Cat is Getting Way Too Big!\n\nWe have four cats currently (we used to have five) and we put out food each day for all of them, along with water obviously. However, one of the cats gets in and gobbles up the food for all of them before the others have much of a chance.\n\nHow can we get that cat to control herself or share better? I'd hate to have to segregate them during feeding time and babysit them one by one to make sure the fat cat eats only what she is supposed to.\n\nDoes anyone have any suggestions? Also, if you know how to get this cat on a diet, those tips would be welcome as well. I don't think she's healthy at the size she's at. Thanks!","<PERSON>\n\nthis was my favorite movie when i was 14 so i rewatched it just to see how it held up + i was in need of a lighthearted movie bc i’ve watched a lot of heavy ones lately.\n\nas far as political correctness goes this movie is far from it (made in 1953 and portrays native americans terribly) so keep that in mind, but it’s seriously just pure serotonin and i HIGHLY recommend it if you just need a cute movie to lift your spirits.\n\ncan i also just point out that this movie is 🏳️‍🌈🏳️‍🌈 af?? so funny that 14 y/o gaybutididn’tknowityet me loved this movie sm.","[0.35336807, -1.7503309, -0.66188, -0.52688986, 0.1992927, 0.8703334, 1.5161055]","[0.61085254, 0.44736424, 0.53195286, 0.5424436, 0.5988786, 0.65102834, 0.7012143]",6
19,"The <PERSON> of Inisherin\n\nI had pretty neutral feelings towards this going in. I enjoyed 3 Billboards immensely, but was a bit underwhelmed for In Bruges, still enjoying it for the most part. I have to say I was pleasantly blown away with how much I got out of this.\n\nI would go as far to say I got more out of this than any other of his films, and is easily one of the best films of the year. At no point was I bored or felt disengaged. The psuedo-spiritual aspect of this in particular was the most fascinating, and all the performances, shots, and pacing was quite fantastic. The comedy itself may be too dark for some was hysterical for me and built up in an amazing blend of shock and hilarity.\n\nQuite a few times, I found myself getting lost in the dialogue at how it applied to my own life, feeling things similar to the characters, different applications of their mentalities, and reflecting on how I've spent my time and priorities. Anytime a film can do this for me or aid me in this I fall in love with it, and this puts it far and beyond a large portion of modern cinema. If I had to equate this film to a particular feel I would say this runs in the same vein as a lot of <PERSON> films. The small island atmosphere of an intense drama between a small cast, taking on existential questions that often pit them against each other. Obviously this was far more comedic than <PERSON>, but the core to me remains the same.\n\nI highly recommend seeing this as soon as possible I may revisit it soon myself.\n\nViewed at local Mariemont Theater","Room\n\nRoom is definitely one of those heartfelt late bloomers in the year that we all love. It went undetected by many (including me) and suddenly appeared as a Best Picture nomination. I decided to watch it and am quite glad that I did so.\n\nFirst of all, the two leads absolutely carry this film. They delivered stellar performances and easily elevated the effectiveness of this film. They had so much chemistry as mother and son. As another note, that scene with <PERSON> rolling out of the truck almost made me pass out. It was mostly due to the fact that I honestly cared about the fate of the main characters, and that was easily the most suspenseful scene of the film (was so well done too).\n\nThe one thing I think I can judge this film on plot wise is that I would've liked to have seen more content from the real world, as well as more on how <PERSON> got into 'Room'.\n\nIs Room going to win Best Picture? Probably not. Is <PERSON> going to get Best Actress? She very well could. Whatever the true answers are to these questions, this film was very enjoyable and not an Oscar bait film by any means.","[0.13273303, 2.0959442, -1.0901859, 0.47817984, -0.88361454, -0.6143037, -0.11872505]","[0.88315976, 0.89579034, 0.87529194, 0.88538224, 0.87662095, 0.8783536, 0.88154197]",1
4,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","Moonage Daydream\n\nThis is just incredible; it's not just a movie, a documentary, or a concert; no, this is an experience, an experience that just gets taken to the next level on IMAX.\n\nWhile watching this I could only think about how much <PERSON> would love this experience, she is the huge fan of <PERSON> out of the 2 of us, but honestly, that doesn't really matter because even if you are not a fan, you will become one.\n\nVisually this is just something else, the way they mix the archive footage, footage of movies and iconic moments of pop culture and never seen footage to tell us the story of <PERSON> is just genius and actually incredible.\n\nAnd all this with one of the best soundtracks you could have because it is music by <PERSON>, so what else could we ask?\n\nYes, the movie is 135 mins, and that might take a toll on the attention of some of the audience, but the storytelling of <PERSON> is just so inspiring and captivating that you just can't wait to see where he takes it next.\n\nThis breaks every mould of the tradition of “music bio-doc”, it its not about how it all happens but more about how it all makes us feel, and I just loved it and really think everyone should experience it\n\nHave you seen it? What did you think?\nAre you a <PERSON> fan?","[1.671571, -1.8493831, -0.23857541, 0.7927009, 0.0077329855, -0.30720568, -0.07684072]","[0.8274274, 0.79138684, 0.8078751, 0.81843126, 0.8103963, 0.8071726, 0.8095306]",0
66,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","What a week\n\nMy boss was on vacation this past week and lemme just say, she's not allowed to leave again lol. Shit hit the fan immediately. Here's some highlights:\n\nWednesday: a truck broke down in our entrance so my coworker called and woke me up to ask what to do. He couldnt handle googling towing companies apparently so I had to do it while on the phone. No one could come out because they either were too busy or didn't have a wrecker. My district manager laid into me about it even though we called 6 different companies\n\nThursday: some of you might remember me posting about this a month or so ago. My coworker (same one mentioned above) gave away $50 lottery tickets. Guess what? He gave away 5 $30 tickets this time. I blew up on him and he accused me of lying even though I have him on camera doing it/printed out the receipt showing he didn't even scan them. I paid for the tickets the next night and the guy he gave them to paid me back Saturday.\n\nFriday night/Saturday morning: uneventful until about 4:30 am. A storm rolled in and the wind was so bad it ripped one of the doors out of place and was only hanging on by the top hinge. The roof was leaking on me while I was fighting to get the door back in place then we lost power for an hour or so. People were still trying to come inside even though it was pitch black inside and out. The other assistant manager came in shortly after the power came back on and helped me block off the broken door. Someone still tried to walk through the door and knocked it loose. I screamed, ""I DON'T GET PAID ENOUGH FOR THIS"" at him then went to Walmart to buy caution tape.\n\nSaturday night/this morning: busy AF all night. Morning rolls around and a guy comes in and looks at just our stand alone coolers then goes ""DO YOU NOT CARRY BOTTLED SODA?!"" Keep in mind, I clocked out and was just standing around talking at this point. I look at him and go, ""...Yeah against the walls"" and gestured to the rest of the store. He replies, ""WELL I ONLY SAW BEER!"" I looked at the other assistant manager and told her I had to go or I was explode.\n\nTonight: I came in and someone had graffitied like half of the back of the store. I tried to clean it up but I got cleaner on my pants, got pissed off, went inside, and started slamming shit to feel better. Sundays are my nights to catch up on everything so I was cleaning machines out then the worst fucking people came in. Long story short, the girl had to pay for the two guys she was with. The second guy brought up coffee and I asked if she wanted me to add it to her purchase and she goes ""well no fucking shit."" Cool bestie.\n\nThe coffee guy said he liked my tattoos and they probably mean I have a beautiful personality (I'm not attractive in the slightest tbh so this was a dig at me) then he goes, ""never mind, you're blonde. You're racist as fuck."" I DIDN'T SAY ANYTHING TO THIS GUY OTHER THAN THANKS WHEN HE SAID HE LIKED MY TATTOOS. Fucking ridiculous.\n\nI still have 3.5 hours left. I might update this if anything else stupid happens. Here's to a better and smoother week.","[0.20816574, -1.7347567, -0.62149245, 1.0749925, -0.14551556, -0.29670468, 1.5153269]","[0.6388284, 0.6095375, 0.6263207, 0.6518964, 0.6334964, 0.6312171, 0.65853477]",6
28,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","The Hidden Fortress\n\n94\n\nHad the sudden urge to rewatch this, cements itself as maybe my favorite <PERSON> world to live in, and maybe the most fun movie he ever made. Its certainly not as refined as his masterworks, but still full of so much rich storytelling\n\n<PERSON> and <PERSON> are the perfect duo, and <PERSON> is clearly having so much fun playing <PERSON>, the perfect mix between his rigid stoicism and feral antics of other roles\n\nAs much as I love <PERSON>'s portrayal and the mark he left on the character for <PERSON> to elevate tenfold, this film always makes me so curious as to what <PERSON>'s Obi-Wan would have been like. Would have changed the entire trajectory of Star Wars from the beginning, had he said yes","[1.0142224, -0.9265235, 1.5275327, 0.8535201, -0.8204405, -0.9228136, -0.72549766]","[0.8465676, 0.8248656, 0.85230756, 0.84477055, 0.82605183, 0.82490706, 0.8271135]",2
50,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","The Immortal Augustus Gladstone\n\nFeels at times like a less deranged version of something like Deadly Lessons or The Astrologer, two stone-cold classics of “I imagined the most interesting guy ever and now I have to make a movie about him. and also I’m playing him” cinema. But <PERSON> is decidedly a character, not a self-insert fantasy, so we’re left with <PERSON> committing almost uncomfortably hard to a role that’s…not particularly interesting if you aren’t <PERSON>. Not a fan of the abrupt shift in focus toward the end, either—loath as I am to call things “unearned”, the ploy the film pulls out in the last 15 minutes needs a more focused setup than <PERSON>’s script provides. It’s got a good heart, though, and I’m a sucker for the set design in <PERSON>’s room.\n\nAnd for those unfamiliar with <PERSON> video game credits: to put this one in context, imagine if <PERSON> dropped a $3 itch.io game that had almost nothing to do with space. It’s not a perfect analogy, but it’ll do.","[0.657998, -1.6527674, -0.47166952, 0.397965, -0.4700408, 1.7817055, -0.24318194]","[0.84197575, 0.82658505, 0.8344517, 0.8402438, 0.8344625, 0.8494601, 0.8359735]",5
60,"One Cat is Getting Way Too Big!\n\nWe have four cats currently (we used to have five) and we put out food each day for all of them, along with water obviously. However, one of the cats gets in and gobbles up the food for all of them before the others have much of a chance.\n\nHow can we get that cat to control herself or share better? I'd hate to have to segregate them during feeding time and babysit them one by one to make sure the fat cat eats only what she is supposed to.\n\nDoes anyone have any suggestions? Also, if you know how to get this cat on a diet, those tips would be welcome as well. I don't think she's healthy at the size she's at. Thanks!","Breaking the Waves\n\nCannes Film Festival: Competition\nGrand Prize of the Jury\n\nAcademy Awards, USA - Nomination:\nBest Actress in a Leading Role -Emily Watson\n\nGolden Globes, USA - Nomination:\nBest Motion Picture - Drama\nBest Performance by an Actress in a Motion Picture - Drama - <PERSON>\n\nBAFTA Awards - Nomination:\nBAFTA Film Award Best Performance by an Actress in a Leading Role - <PERSON>\n\nCésar Awards, France - Won:\nBest Foreign Film (Meilleur film étranger)\n\nGoya Awards - Nomination:\nBest European Film (Mejor Película Europea)\n\nNew York Film Critics Circle Awards\nBest Director -Lars von Trier\nBest Actress - <PERSON>\nBest Cinematographer","[-0.824172, -1.692401, -0.33369634, 0.06927005, 0.35742685, 0.84046173, 1.5831101]","[0.49689895, 0.42321116, 0.5385263, 0.5727266, 0.59718287, 0.6381787, 0.7012083]",6
23,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","<PERSON> and <PERSON>\n\npeople looking for a film focusing on <PERSON> are going to be disappointed as he is second fiddle to the main focus of the plot, the informant <PERSON>. i now understand why <PERSON> decided to not contribute to the film’s soundtrack.\n\nat the same time this might be one of the most left-leaning films from the mainstream hollywood film machine. it’s a story that needed to be told through a cinematic lens and <PERSON> did an excellent job telling this story. the performances are powerful and i hope <PERSON> wins an academy award as <PERSON>.\n\nalso can we talk about the crazy get out reunion here because we don’t have just <PERSON> and <PERSON> - we also had <PERSON>!!!!! i screamed when he popped out into view in the third act of the film.","[-0.31142968, -1.4364933, 1.8876356, 0.58866745, -0.7545052, 0.43744874, -0.41129208]","[0.8286841, 0.82230806, 0.84114677, 0.8337852, 0.82617307, 0.8329282, 0.82811815]",2
14,"The <PERSON> of Inisherin\n\nI had pretty neutral feelings towards this going in. I enjoyed 3 Billboards immensely, but was a bit underwhelmed for In Bruges, still enjoying it for the most part. I have to say I was pleasantly blown away with how much I got out of this.\n\nI would go as far to say I got more out of this than any other of his films, and is easily one of the best films of the year. At no point was I bored or felt disengaged. The psuedo-spiritual aspect of this in particular was the most fascinating, and all the performances, shots, and pacing was quite fantastic. The comedy itself may be too dark for some was hysterical for me and built up in an amazing blend of shock and hilarity.\n\nQuite a few times, I found myself getting lost in the dialogue at how it applied to my own life, feeling things similar to the characters, different applications of their mentalities, and reflecting on how I've spent my time and priorities. Anytime a film can do this for me or aid me in this I fall in love with it, and this puts it far and beyond a large portion of modern cinema. If I had to equate this film to a particular feel I would say this runs in the same vein as a lot of <PERSON> films. The small island atmosphere of an intense drama between a small cast, taking on existential questions that often pit them against each other. Obviously this was far more comedic than <PERSON>, but the core to me remains the same.\n\nI highly recommend seeing this as soon as possible I may revisit it soon myself.\n\nViewed at local Mariemont Theater","The Lord of the Rings: The Two Towers\n\n“Those were the stories that stayed with you, that meant something. Even if you were too small to understand why.”\n\n<PERSON> is suuuuuccchhhhhhh a dramatic queen!!!\n\nIn my own headcanon, <PERSON> and <PERSON> are gay lovers.\nBut also <PERSON> is definitely in love with <PERSON>. 🥺\n\n<PERSON> shows the fuck AWF in this one my god. That mounting of the horse was so uncalled for goddamn.\n\nI remember having the <PERSON> toy growing up. It was one of my favorite toys I ever owned.\n\nThe ents going to war gives me full body chills. Truly epic and exciting filmmaking.\n\nThis one was always my favorite when I was younger. Little me had great taste.\n\nWithout a doubt, this is one of the most stunning movies I have ever watched in 4K.","[-1.404247, 1.5506145, 1.2925806, 0.12234924, -0.2790237, -0.50335616, -0.7789284]","[0.7423285, 0.7906012, 0.7863858, 0.76726806, 0.76071095, 0.7570461, 0.75254416]",1
0,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","<PERSON>\n\n“I’m tired of feeling disgusted with myself.”\n\n“Maybe you’re supposed to feel disgusted with yourself when you’re a teenager.”\n\nit’s likeable, but it leaves me wanting more. i think it was too short, and i wish there was more backstory for <PERSON> and <PERSON> (and <PERSON> and <PERSON> as well) to better understand the dynamic there. i’ll probably have to read the book to get that though. also, near the beginning when <PERSON> and <PERSON> were watching desert hearts together and then started making out i was so SHOCKED. where are my people who love desert hearts at.....","[1.5357007, 0.90257436, 0.26250318, 0.16515578, -0.661475, -0.44520903, -1.7592723]","[0.79149073, 0.7813458, 0.77108955, 0.7695297, 0.7562841, 0.7597495, 0.7386935]",0


In [71]:
layer_x_pairs_df.to_json('../data/layer_to_pairs_signficance.json')

#### Step 2:

In [46]:
from datadreamer import DataDreamer
from datadreamer.llms import HFTransformers, ParallelLLM, OpenAI
from datadreamer.steps import DataFromPrompt, ProcessWithPrompt,  HFHubDataSource, DataSource, zipped, concat
from functools import partial
from transformers import QuantoConfig
from datasets import concatenate_datasets, load_dataset
import json


model = OpenAI(model_name="gpt-3.5-turbo", api_key='sk-proj-zTbZNk16Ik1pZnqLn38ZT3BlbkFJImq3pd7widkr7RzsC771') #kathy's lab
# model = HFTransformers(
#                 "meta-llama/Meta-Llama-3-8B-Instruct",
#                 quantization_config=QuantoConfig(weights="int8"),
#                 device=0,
#                 device_map="cuda",
#             )
# model.config.pad_token_id = model.config.eos_token_id

def gen_from_iterable_dataset(iterable_ds):
    yield from iterable_ds

def evaluate_text_similarities(data_path, document_pairs, linguistic_lvl_name, linguistic_lvl_desc):
    instruction = "Given the two Documents below, rate their <linguistic_lvl> on a scale from 1 to 5: Score 1 equals very low similarity and score 5 equals high similarity. First, give reasons for your score and then output the score. The output should be in the following format: {\"reasons\": \"explain your rating\",  \"score\": \"<json integer>\"}"
    instruction = instruction.replace("<linguistic_lvl>", linguistic_lvl_desc)
    
    with DataDreamer(data_path):
        datasource = DataSource('documents', Dataset.from_list(document_pairs))
        datasource = datasource.map(lambda row: {'inputs': 'Document 1:\n {} \n Document 2:\n {}'.format(row['text-1'], row['text-2'])}, auto_progress=False)
        ds_focus_questions = ProcessWithPrompt(
          "{} describe text similarity".format(linguistic_lvl_name),
          inputs={"inputs": datasource.output["inputs"]},
          args={
             "llm": model,
             "n": 1,
             "instruction": instruction
          },
          outputs={"generations": linguistic_lvl_name},
        ).select_columns([linguistic_lvl_name])
        
        zipped_step = zipped(datasource, ds_focus_questions)

        results_iter = zipped_step.output.dataset
        results_ds   = Dataset.from_generator(partial(gen_from_iterable_dataset, results_iter))

        return results_ds

In [44]:
layer_x_pairs_df = pd.read_json('../data/layer_to_pairs_signficance.json')

In [45]:
layer_x_pairs_df.layer.value_counts()

0    10
1    10
2    10
3    10
4    10
6    10
5     8
Name: layer, dtype: int64

In [47]:
# Given the two Documents below, rate their <linguistic_lvl> on a scale from 1 to 5: Score 1 equals very low similarity and score 5 equals high similarity. First, give reasons for your score and then output the score in the following output format: {\"reasons\": \"explain your rating\",  \"score\": \"<json integer>\"}
ling_phenomena = {
    'syntax': 'syntactic similarity',
    'semantic': 'semantic similarity',
    'lexical': 'lexical similarity',
    'discourse': 'discourse similarity'
}

In [48]:
layer_x_pairs = [row.to_dict() for idx, row in layer_x_pairs_df.iterrows()]

In [49]:
results = []
for key, val in ling_phenomena.items():
    results.append(evaluate_text_similarities('./output', layer_x_pairs, key, val))

[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Initialized. 🚀 Dreaming to folder: ./output
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents' was previously run and saved, but was outdated. 😞
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents' finished and is saved to disk. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' will run lazily. 🥱
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' was previously run and saved, but was outdated. 😞
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'documents (map)' finished running lazily. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'syntax describe text similarity' finished and is saved to disk. 🎉
[ [35m🤖 Data[33mDr[31m

In [50]:
all_results = concatenate_datasets(results)

In [51]:
all_results.save_to_disk('../data/described_similarities_ds')

Saving the dataset (0/1 shards):   0%|          | 0/272 [00:00<?, ? examples/s]

### Analyze layer to lingustic similarity:

In [52]:
all_results = load_from_disk('../data/described_similarities_ds')

In [53]:
all_results_df = all_results.to_pandas()

In [54]:
zscores= all_results_df['z-score'].tolist()

In [55]:
all_results_df[['text-1', 'text-2', 'layer', 'lexical', 'syntax', 'discourse', 'semantic']].head(n=50)

Unnamed: 0,text-1,text-2,layer,lexical,syntax,discourse,semantic
0,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","<PERSON>\n\n“I’m tired of feeling disgusted with myself.”\n\n“Maybe you’re supposed to feel disgusted with yourself when you’re a teenager.”\n\nit’s likeable, but it leaves me wanting more. i think it was too short, and i wish there was more backstory for <PERSON> and <PERSON> (and <PERSON> and <PERSON> as well) to better understand the dynamic there. i’ll probably have to read the book to get that though. also, near the beginning when <PERSON> and <PERSON> were watching desert hearts together and then started making out i was so SHOCKED. where are my people who love desert hearts at.....",0,,"**Reasons for Rating:**\n\nDocument 1 and Document 2 have different structures, tones, and content. Document 1 discusses a film and analyzes its themes and characters in a detailed and critical manner. On the other hand, Document 2 seems to be a personal reflection or review with a more casual and conversational tone, mentioning specific scenes and expressing personal reactions. The syntactic structures, vocabulary, and overall style of writing in the two documents are quite distinct, with Document 1 being more analytical and descriptive, while Document 2 is more informal and opinionated.\n\n**Score:**\n\n{""reasons"": ""The two documents have different structures, tones, and content, with Document 1 being more analytical and descriptive, and Document 2 being more informal and opinionated. The syntactic structures, vocabulary, and overall style of writing in the two documents are quite distinct."", ""score"": ""1""}",,
1,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","The Killer\n\nFun and stylish, mostly. The voiceover, particularly in the first act, reeked of simply not trusting the audience; A version of this opening playing with no dialogue would have been a stone-cold classic. <PERSON> is pretty good here, and when she shows up, the scene with <PERSON> is fantastic.\n\nI think people are overstating the politics a bit, or, rather, the politics are definitely there, but it is also basically just a rehashing of the politics of the <PERSON> franchise. It uses the killer-for-hire setting to comment on how an elite have increasingly insulated themselves from average citizens, and how they expect to play by different rules than those beneath them, freely changing the rules and agreements to suit their own agendas.\n\nStill, it is enough fun (and funny; the killer is pretty bad at his job sometimes!) that I think this would have been a four for me until the ending. But why he let the billionaire live after killing people much, much less culpable was frankly baffling to me. Thematically, I guess you could say that is a comment on who is above consequences too, but again <PERSON> is better at this. Here there's no real reason why he can't just kill the client, so for me it breaks the plot. Some lipservice is paid that the cops would investigate it more seriously since he was rich, but honestly the target at the beginning seemed pretty well off too; and are we to believe he never killed any wealthy or high-profile people? Why wasn't it a concern previously?\n\nIt left me with a bad taste in my mouth, which is probably the point, but I think <PERSON> got lazy in doing the heavy lifting that would have made the thematic point he wanted to make make narrative sense.",0,,"**Reasons for the rating:**\n\nDocument 1 focuses on a film and provides a detailed analysis of its themes, characters, and overall impact. It uses descriptive language and delves into the emotions and experiences of the characters. On the other hand, Document 2 discusses a different film, ""The Killer,"" and critiques its plot, characters, and thematic elements. It also includes some commentary on the political aspects of the film. The writing style in Document 2 is more critical and analytical compared to the more emotional and descriptive style of Document 1. \n\n**Score: 2**\n\n**Output:**\n{""reasons"": ""The two documents discuss different films and have distinct writing styles, with one being more emotional and descriptive while the other is critical and analytical."", ""score"": ""2""}",,
2,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","METRICS\n\nSo we've all likely been there before. You're hired in, pay is decent, working remote - in my case, ""you'll be taking about 14-20ish calls a day, waiting around watching Netflix"" this is the life of a claims adjuster.\n\nWell, needless to say : Not the case.\n\nFast forward two years.\n\nI'm now a team lead for customer service, with the ability to jump in and handle claims. If it's not one thing it's another, but I do enjoy my job. I get to fix people's screw ups and help assist contract holders, and answer questions and whatnot. Aside for the job having it's pros and cons, it appears the ""higher ups"" always have their priorities in line.\n\nAvailability time.\n\nMy job requires me to remain available to perform it, and unfortunately..if I were to remain available, I'd be non-stop taking claims calls, then being asked questions between calls from customer service..taking ""supervisor"" calls (don't even get me started), all at once. Overall, too much.\n\nOne of my coworkers was on the chopping block because of high ""unavailable"" times, and it was made known he needed to fix it. Unfortunately he decided to leave because he felt like he couldn't do his job and got sick of the email threats from our manager.\n\nWell, less than a week later: I got that email. 80% unavailable time. Keep in mind, I am working. Fixing other people's claims and getting them authorized, addressing dissatisfied customers concerns and either putting a stop to them or assisting as much as possible. I am, in theory..a supervisor, but paid significantly less, as my title is a ""hybrid"" - team lead for customer service and a claims adjuster.\n\nAppears to me that all companies that follow metrics almost militantly, are either upset you're too available, too unavailable, etc.\n\nI'll be honest, I'm a foot out the door. I already know what's coming and I made it apparent to my manager that this whole issue goes against me taking the team lead position.\n\nI basically was met with ""my hands are tied, not my problem, adhere to it or you'll get warnings"" etc.\n\nAnyone else experiencing this?",0,,"{""reasons"": ""Document 1 focuses on analyzing a film and its themes, using descriptive language and discussing character development. Document 2, on the other hand, is a personal narrative about a job experience, discussing work-related challenges and frustrations. The syntactic structures, vocabulary, and overall tone of the two documents are quite different, leading to a low similarity rating."", ""score"": ""1""}",,
3,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","<PERSON>\n\n<PERSON>, Windows from THE THING and <PERSON> (wearing a Wrestlemania hat!) decide to kill some drug dealers (including <PERSON>!) so they can raise money to overthrow the Colombian government to avenge their buddy's death.\n\nFrankly, this ruled. It's great to see <PERSON>, Action Hero. There's a moment in the opening, with <PERSON> as a POW in Vietnam, where he is trying to avoid a guy trying to stab him, and so help me he does a little soft shoe shuffle out of the way. It's a split second but it's not far off from his work in the ""Weapon of Choice"" video.\n\nLoved it. <PERSON>' magnum opus. Nine stars on a scale of seven thumbs up!",0,,"{\n""reasons"": ""Document 1 focuses on a film review discussing themes, characters, and the overall impact of the movie. It delves into the protagonist's struggles and the city's influence on him. Document 2, on the other hand, describes a different scenario involving action heroes and a mission to overthrow a government. The tone, content, and context of the two documents are quite distinct, with Document 1 being more analytical and reflective, while Document 2 is more action-oriented and humorous. These differences in subject matter, style, and focus contribute to a low syntactic similarity between the two documents."",\n""score"": ""1""\n}",,
4,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","Moonage Daydream\n\nThis is just incredible; it's not just a movie, a documentary, or a concert; no, this is an experience, an experience that just gets taken to the next level on IMAX.\n\nWhile watching this I could only think about how much <PERSON> would love this experience, she is the huge fan of <PERSON> out of the 2 of us, but honestly, that doesn't really matter because even if you are not a fan, you will become one.\n\nVisually this is just something else, the way they mix the archive footage, footage of movies and iconic moments of pop culture and never seen footage to tell us the story of <PERSON> is just genius and actually incredible.\n\nAnd all this with one of the best soundtracks you could have because it is music by <PERSON>, so what else could we ask?\n\nYes, the movie is 135 mins, and that might take a toll on the attention of some of the audience, but the storytelling of <PERSON> is just so inspiring and captivating that you just can't wait to see where he takes it next.\n\nThis breaks every mould of the tradition of “music bio-doc”, it its not about how it all happens but more about how it all makes us feel, and I just loved it and really think everyone should experience it\n\nHave you seen it? What did you think?\nAre you a <PERSON> fan?",0,,"{""reasons"": ""Document 1 focuses on a film review discussing the performance and themes of a movie, while Document 2 is a review of a different type of experience, a music documentary. The syntactic structures, vocabulary, and overall content of the two documents are quite distinct, with Document 1 using more descriptive language related to film elements and Document 2 focusing on the visual and auditory aspects of a music documentary. Therefore, the syntactic similarity between the two documents is low."", ""score"": ""1""}",,
5,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","Dogs have feelings too.\n\nWhat I learned from thinking about my dog's feelings, and my advice to you\n\nContext: My dog <PERSON> is a sweetheart, but he has extreme separation anxiety. He was also adopted from my mother-in-law, whose ex-fiancee fed him whatever human food he was eating himself at the moment. Because of these two things, <PERSON> is a very needy and vocal dog, and it can be frustrating to navigate his wants and desires as they often feel like an inconvenience.\n\nI felt this way about <PERSON> for our first two years together; however, I had a huge epiphany when I came across animals who use assistive communication devices such as Ellie the Cockatoo and <PERSON>.\n\nThe Revelation: The animals who are trained to communicate using iPads and buttons with speakers have fundamentally changed the way I understand animals' consciousness. They are able to express when they are sad, mad, afraid, lonely, bored, and even tell jokes.\n\nAfter watching hours and hours of this talking animal content, it suddenly felt extremely inhumane to deny <PERSON> of the things I knew he was asking for, just on his body language alone. It also made me wonder about all the things he wants but isn't getting because I don't understand what he is trying to say.\n\nBased on this new knowledge, I decided to think of his wants and needs just as if he were a person, and it would be wrong of me to deny him of things because 1) I am kind of his parent and 2) it is very anthropocentric or ""people-centered"" to think that my needs and wants are more important than his.\n\nThe biggest and most immediate change between us was allowing him in the bed. He became so much more comfortable with me and my wife being gone during the day because he apparently just wanted to snuggle with us all night and recharge his cuddle battery. He quickly stopped doing naughty things like going through the trash and tearing up paper, things I now know he did to express that he was upset.\n\nSo if you have a pet who gets into mischievious things or has some other behavioral problem that you just can't seem to solve, try thinking about their feelings as just another being with feelings as opposed to as their pet parent. You will be surprised at how simple some of these issues are to solve by just giving them your undivided attention for 10 minutes a day, for example.",0,,"**Document Similarity Rating:**\n\n**Reasons:**\n- Document 1 focuses on analyzing a film and its characters, discussing themes of darkness, corruption, and redemption. It delves into the portrayal of a character within a specific narrative context.\n- Document 2, on the other hand, discusses the feelings and needs of a pet dog, emphasizing the importance of understanding and empathizing with animals' emotions and behaviors. It provides a personal anecdote about the author's relationship with their dog and the changes they made based on newfound knowledge.\n\n**Score:**\n{""reasons"": ""The two documents have very different topics and structures, with Document 1 focusing on a film analysis and Document 2 focusing on pet behavior and emotions. The syntactic similarity between the two documents is low due to the distinct subject matters and writing styles."", ""score"": ""1""}",,
6,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","High School High\n\nI went in with reasonably low expectations, and frankly underestimated this bad boy!\n\nThis is a playful and unapologetic parody of the whole ""do-gooder goes to struggling school"" genre--and even its spinoffs like The Principal and The Substitute (very clearly included). And I gotta say, a lot of the jokes landed.\n\nBest for me was the utter lack of an attempt at a ""swing to the serious in the final act"" blunder made by something like 90% of comedies. Nope! If anything, they it just got funnier and more irreverent. It just kept going after the genre--and, perhaps, the subject matter--with its eyes always on the laughs.\n\nHulu informed me that this was from the makers of The Naked Gun, and I was a little doubtful. By the end, the fingerprints were unmistakable, especially when it descends into utter mayhem in the final act. This doesn't have the tears of mirth and guffaws I got from those classics, but the laughter was there, and consistently.\n\nIf you LOVE those classics... you'll probably like this.",0,,"{\n ""reasons"": ""Document 1 focuses on a serious, dark film with in-depth analysis of characters and themes, while Document 2 is a lighthearted review of a comedy movie. The tone, subject matter, and overall style of writing are very different between the two documents, leading to a low syntactic similarity."",\n ""score"": ""1""\n}",,
7,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","<PERSON>\n\nVisually speaking, this is as gargantuan as anything that <PERSON> made in his late career, LAWRENCE OF ARABIA and RYAN'S DAUGHTER in particular, and it's as beautifully realized and rendered as you could ask for, combining <PERSON>'s grandeur with the lived-in grit and grim <PERSON> brought to STAR WARS and <PERSON> to SOLARIS.\n\nIt's as portentous and weighty as you could imagine (and then some), and so utterly steeped in mythology in a way that's simultaneously riveting and frustrating. I haven't read <PERSON>'s novel so I could be off-base with this, but I have a hard time believing that this is all that watered-down in the sheer intricate structuring of the world when compared to the novel. It works best when following the story of a kid putting on a brave face while reckoning with the fact that he may just be the messiah (which is interesting since BLADE RUNNER 2049 did the opposite, following someone coming to grips with the fact that they're not the chosen one after all), but a good few stretches of this wind up feeling like someone reading you the LOTR Appendices instead of the actual story itself -- mythology for the sake of mythology. Still, you have to give the suits at WB credit for not getting weak at the knees over all the gobbledygook crammed into these 2+ hours. For as vocal as <PERSON> was this time last year about their sudden theatrical/streaming hybrid release announcement (which he was totally justified in complaining about), at least they didn't pull the same kind of crap that <PERSON> did with <PERSON> and just slice his movie to incomprehensible bits - so, credit where credit is due, I guess.\n\nStill, it's hard to fully get a feeling for this Part One alone without the context of where things will go in the second film. By itself, this one feels mostly like a giant prologue, setting up a long string of plot devices that'll get paid off later on down the road which, I dunno, I guess is fine considering that that's the direction a lot of big genre content is going in these days - preferring serialized films that are so tightly connected to their follow-ups that they hardly have a leg of their own to really stand on, as opposed to something like, say FELLOWSHIP OF THE RING which managed to lay huge groundwork, establish its relationships, move its chess pieces, and raise the stakes all with its own individual emotional payoff and a sense of roundedness, i.e. it worked as both a standalone film and as a part of a trilogy. My feeling could change but right now DUNE - PART ONE feels exactly like its final line of dialogue -- ""This is just the beginning...""\n\nThose sandworms though. <PERSON> kills at doing massive scale.",0,,"{""reasons"": ""Both documents discuss films and provide detailed analysis of the characters, themes, and overall impact. They both contain multiple paragraphs with complex sentences and use of film-related terminology. However, Document 1 focuses on a specific film and its characters, while Document 2 discusses a different film and its mythological aspects. Despite the differences in content, both documents exhibit a high level of syntactic complexity and depth in their analysis, which indicates a similarity in the writing style and structure. Therefore, I would rate their syntactic similarity as 4."", ""score"": ""4""}",,
8,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","<PERSON>\n\nSuuuuuper tender and sweet. I’ve seen a couple of <PERSON> films, and I really like how he’s unafraid of bordering on cheesiness with his sentimentality but just keeps so much gravitas within his engaging characters that it doesn’t go too far. The obvious <PERSON> comparisons are apt (from a visual standpoint) but he reminds me wayyy more of <PERSON>-eda. Some serious Our Little Sister type stuff.\n\nAll of the performances are fantastic. <PERSON>, <PERSON> and little <PERSON>…Christ, all such naturalistic depictions of grief and <PERSON> just gives them and their characters so much time to breathe. It’s so hard not to fall in love with them. Just pure empathy all around, today’s cinema needs more of it!",0,,"**Reasons for Rating:**\n\nThe two documents have some similarities in terms of discussing films and performances, but they also have significant differences in content and tone. Document 1 focuses on a darker, gritty film with themes of corruption and destruction, while Document 2 describes a tender and sweet film with naturalistic depictions of grief. The language used in each document also varies, with Document 1 using more intense and descriptive language compared to the more heartfelt and empathetic tone of Document 2. Additionally, Document 1 mentions specific elements like drugs, violence, and corruption, which are not present in Document 2. Overall, while both documents discuss films and performances, the differences in content, tone, and language make them syntactically dissimilar.\n\n**Score:**\n\n{""reasons"": ""The documents have significant differences in content, tone, and language, making them syntactically dissimilar."", ""score"": ""2""}",,
9,"<PERSON>\n\nI love this film. <PERSON>’s greatest achievement is undoubtedly this. I believe <PERSON> performance is the greatest ever put to film.\n—\nTogether, they embody a character that is just fascinating to watch. You never empathise with him, but you do not hate him. It always has stricken me that the <PERSON> is a product of his own environment. NYC is the villain, the enabler. The drugs, the gambling, the scum, the darkness, the violence, all just naturally exists around him and he is just a part of the city’s routine. He cannot break free of its immoral, corruptive nature. He is bound to it.\n—\n<PERSON> contrasts innocence with depravity. The lieutenant drops his kids to school and takes a hit straight after. He wakes up the next day next to a toddler, still wasted from the night before, the drug dealer shown trying to make ends meet for his family and the nun is corrupted. Despite being such a cynical film, I believe <PERSON> tries to shine glimpses of hope on his city.\n—\nThe film doesn’t really stop. It’s scene after scene of some misdeed, mistake or wrongdoing. It literally doesn’t stop gambling, drinking, destroying, or drugging. Neither does the lieutenant. Neither does New York. It’s a fast track to destruction in one way or another. There’s no escape. It never seems like his redemption is possible until the very end but by then it’s too late. The nightclubs, shootings, drugs, stack on top of one another, an evil momentum constantly building, and <PERSON>’s soaring use of music enhances this build up all the more. You feel doomed from the very start.\n—\nFerrara is fascinating in so many ways. This is his peak shit, and one of my all time favourite movies. It genuinely blows me away every time I see it. Underrated.","<PERSON>\n\nThe identity burden that follows all our lives since birth without doing anything. 😭 I cried a lot throughout the film. Definitely the director's best yet. Compelling script, precise direction and ensemble of all great actors.\n\nEveryone's so good here. <PERSON> just talked about loss of a child and I cried 😭 She's that good. <PERSON> as the pseudo title double role is loveeeeee. <PERSON> is exactly like the poster!!! He is the reflection a lot of characters here. Brilliant!\n\n<PERSON> and <PERSON>'s scene strike me as very memorable. Great details!\n\n""Do you even know if the person in front of you is the real one you want to meet?""\n\nThat's chicken and egg problem tho. How do we judge a person without records? Anyways, love the film.",0,,"{""reasons"": ""The two documents have different themes and tones. Document 1 focuses on a gritty, dark film with a character analysis, while Document 2 discusses a film with emotional depth and character interactions. The sentence structures, vocabulary, and overall writing style differ significantly between the two documents, making them syntactically dissimilar."", ""score"": ""2""}",,


In [56]:
all_results_df.layer.value_counts()

0    40
1    40
2    40
3    40
4    40
6    40
5    32
Name: layer, dtype: int64

In [59]:
def aggregate_score(list_of_scores):
    scores = []
    for json_str in list_of_scores:
        if json_str == None:
            continue
        try:
            if "\n\n" in json_str:
                json_str = json_str.split("\n")[-1].strip()

            json_str = json_str.replace("Output:", "").strip()
                
                
            json_obj = json.loads(json_str)            
            scores.append(int(json_obj['score']))
        except:
            print(json_str)
            print('==============')
    return round(np.mean(scores), 2)

# Aggregating layer score
layer_scores_df = all_results_df.groupby('layer').aggregate({
    'lexical': aggregate_score,
    'syntax': aggregate_score,
    'discourse': aggregate_score,
    'semantic': aggregate_score,
}).reset_index()

**Score:** {"reasons": "The content, language, tone, and structure of the two documents are very dissimilar, making their syntactic similarity low.", "score": "1"}
**** {"reasons": "The two documents have very different topics, tones, and contexts, making them semantically dissimilar.", "score": "1"}


In [60]:
layer_scores_df.head(n=7)

Unnamed: 0,layer,lexical,syntax,discourse,semantic
0,0,1.5,1.6,1.2,1.2
1,1,1.8,2.1,2.1,2.1
2,2,1.6,1.6,1.4,1.7
3,3,1.3,1.22,1.3,1.33
4,4,2.2,2.0,2.1,2.3
5,5,1.5,1.25,1.0,1.12
6,6,1.0,1.1,1.1,1.0
