In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
from sentence_transformers import SentenceTransformer, util
import numpy as np
import torch
import textwrap
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForTokenClassification, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, AutoModelForSequenceClassification, Trainer, EarlyStoppingCallback
import os
from datasets import load_dataset, Dataset, DatasetDict
import transformers
from trl import SFTTrainer
import bitsandbytes as bnb
import evaluate
import random
import pandas as pd
import base64
from IPython.display import Image, display, Markdown
import ipywidgets as widgets
from sklearn.metrics import classification_report
# from google.colab import userdata
import openai
from openai import Client
from copy import deepcopy
import json
import backoff
import csv
import time
import nltk
from nltk.tokenize import sent_tokenize
from lexrank import LexRank
from humanfriendly import format_timespan
from tqdm.notebook import tqdm
from alive_progress import alive_bar
from functools import partialmethod
from IPython.display import clear_output
import sys
import sentencepiece
import re
import string
from google import genai
from google.genai import types

In [3]:
with open('api/config.json') as f:
    config = json.load(f)

In [4]:
REPLICATE_API_TOKEN = config['REPLICATE_API_TOKEN']
os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN
OPENAI_API_KEY = config['OPENAI_API_KEY']
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
KIMI_API_KEY = config['KIMI_API_KEY']
os.environ["KIMI_API_KEY"] = KIMI_API_KEY
GEMINI_API_KEY = config['GEMINI_API_KEY']
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY

In [5]:
# import torch
torch.cuda.is_available()

True

In [6]:
import replicate

def md(t):
  display(Markdown(t))

def llama2(prompt, temperature=0.0, input_print=True):
  output = replicate.run(
    "meta/llama-2-7b-chat",
    input={
        "prompt": prompt,
        "max_tokens": 2048,
        "temperature": temperature})
  return "".join(output)

def llama3_8b(prompt, temperature=0.0):
  output = replicate.run(
    "meta/meta-llama-3-8b-instruct",
    input={
        "prompt": prompt,
        "max_tokens": 2048,
        "temperature": temperature})
  return "".join(output)

def llama3_70b(prompt, temperature=0.0):
  output = replicate.run(
    "meta/meta-llama-3-70b-instruct",
    input={
        "prompt": prompt,
        "max_tokens": 2048,
        "temperature": temperature})
  return "".join(output)

In [7]:
prompt_main = """
Your task is to perform sentiment classification of movie reviews. Below are some examples of movie reviews and their corresponding sentiment labels (positive/negative):

Review: I have been fan of Pushing Daisies since very beginning. It is wonderfully thought up, Bryan Fuller has most remarkable ideas for this show.<br /><br />It is unbelievable on how much TV has been needing creative, original show like Pushing Daisies. It is huge relief to see show, that is unlike if you compared it to of newer shows, such as Scrubs House, you would see similarities, it does get at moments to see shows close identity.<br /><br />With magnificent wonderful script, hilarity every episode, Pushing Daisies is, by-far, one of most remarkable shows on your television.
Sentiment: Positive

Review: Steven Seagal, Mr. Personality himself, this time is the greatest Stealth pilot is promised pardon the military(..who attempted to swipe his memory at the beginning the movie for he escaped later caught after interrupting gang robbers shootout at gas station)if he is able to infiltrate Northern Afghanistan terrorist base by called Black Sunday, have commandeered an Air Force stealth fighter to an American traitor. with fellow pilot admired the traitor, Jannick(Mark John Sands(Seagal)will fly into enemy territory, receiving help his Arab freedom fighter, Rojar(Alki David) once they are on ground. Jannick is kidnapped by Black Sunday Stone(Vincenzo his enforcer, Sands must figure out how to not only re-take command the kidnapped stealth fighter, but rescue him as well. maybe, Sands can get revenge on the traitor he trained, Rather(Steve the process. Sands hours until General's Navy pilots bomb the On the stealth, Black Sunday biochemical bomb, hoping to detonate it on the /><br />Seagal gets chance to shoot Afghans he isn't slicing throats with knives. The film is mostly machine guns firing bodies dropping dead. The setting Afghanistan doesn't hold up to scrutiny(..nor does how easily Seagal are able to move about the area undetected so easily) the plot itself is nothing to write home about. The movie is edited fast, the camera bit too jerky. Seagal isn't as active hero as he once was his action scenes are tightly edited we have hard time seeing him taking out his foes, unlike the good old days. One Seagal's poorest he's as understated as ever(..not compliment). Even more disappointing is the fact that Seagal never fights hand to hand combat with the film's chief villains, tis shame. He doesn't even snap wrist or crack neck any visible we see slight resemblance tool getting tossed around, but it's not as clear picture as I enjoy the filmmakers have such fast edits dizzying
Sentiment: Negative

Review: This was one best war movies I've seen because it focuses on characters more then actual war. cast do an excellent job because most of them are relative unknowns it makes everything seem more believable. camera footage is great is so was pacing editing. This movie will actually get to you causes audience to care for charcters.
Sentiment: Positive

Review: I can sadly inform you that movie barely up to them.<br /><br />As much as I to see Janne "Loffe" Karlsson big screen again, writers should have realized scriptwriting process that seven people falling into water, isn't original or funny. story is very thin jokes are used predictable, ones that ain't, is just plain boring. I smiled like three times film.<br /><br />The placement Swedish Findus products is (unintentionally) funny, why not just big sign saying; "Findus made it happen!".<br /><br />Göta Kanal doesn't need to be seen at cinema or DVD, just wait for it to TV, it wont take too long.
Sentiment: Negative

Now, with a single label, classify the sentiment of the following review by filling in the <OUTPUT> tag:
"""
# output = llama3_70b(prompt+'<OUTPUT>')
# print(output)
# md(output)

In [8]:
prompt_4_shot_decompx = "Your task is to perform sentiment classification of movie reviews. Below are some examples of movie reviews and their corresponding sentiment labels (Positive\/Negative):\n\nReview: as a The White Suit is far a here there .< \/ >< \/> S id atton is inventor isn 't getting the to inventions on the because nobody pays him notice , he merely odd odd job about as it were After bluff ing into n ley mill uses laboratory ing , but needs to be cleaned !. He is at first proclaimed a genius and ignored him at suddenly want a big piece of him , but then the doom port ents of an going bust re ars its head and quickly turns to something far more scary .< br \/ >< br \/> Yes the film is very , in fact some scenes are dam , but it 's the satirical to the film it way the ordinary to me . The contradictions about the advent of technology is a here , do we want save closing down industries ? , only have to what happened to the coal industry in Britain I 'm on about . The the film was made is point note , the of nuclear weapons became more hears ay , was to frightening ly . You watch this film and the quick turnaround of events for the main protagonist Stanley , from to enemy in one foul swoop , a victim of his own pursuit to better ! , it 's so dark the film should of been called The Man In The Black Suit .< br \/ >< br \/> I honestly can 't find anything wrong in this film , the script from Roger Mac Doug all , John D ighton , and director Alex Mack end rick could be filmed today and it wouldn 't be out of place is the thought of mind it . The sound and setting is , the direction is , with the ton al shift ad ly handled by Mack end rick . Some of the scenes are just , one in particular t ugs on the strings and one to think of a certain scene in David Lynch 's Elephant Man some 29 years later , and yet after such a downturn of events the film still to take a wink as the that is Alec Guinness gets to close out the film to keep the viewers pond ering not only the future of Stanley , but also the rest of us in this rapidly advancing world .< br \/ >< br \/> A , them atically and as a piece of art , 10 \/ 10 .\nSentiment: Positive\n\nReview: This movie child abduction from the point of view of a Mom ( l isa Hart man Black ) who acts like a man would in an action thriller . other movies where the focus is on the Police , here the Mom is tracking down her ex - husband who kidnapped their son . She gets help from her lawyer who eventually falls in with her .< br \/ >< br \/> Before finally catching up with her son , a lot of bizarre things happen . The Mom tries to a child that looks like her son from a Children 's at a theater . She gets then realizes it is not her child That would have gotten most people put into the Mental Ward or a few months in jail waiting for trial . movie after hours because sorry A little while later Mom breaks into her mother - in - law 's house and then the Police arrive and they have their guns aimed at her but they let her run away because they her ( feel sorry for her ? ). < \/ >< \/> At another point they have found the child , but when the Police arrive search the house it turns out they left out the back door got into river on a ding apparently the Dad around for emergency The Mom gets someone to lend her a raft , and it must have taken some time ( in a real world ), she and the lawyer - boy friend , and the Police up to the other raft pretty fast and it is upside down in the water by landfall . Instead getting out the raft to search for the Dad on land Mom umes he drowned the boy water when she his j acket she cannot swim and sinks The lawyer saves , but they a chance to run after the . point told son died On and on it goes , where it nobody knows ! In some , this movie exploits child abduction and it is not very positive On the other hand , seeing a woman do all the crazy things that men do in these kind of movies was ( or funny ? ).\nSentiment: Negative\n\nReview: the was but i had difficulty understanding what was happening ... was there of symbolism ? ... the 2 gold f ishes - do they mean in Thai ? there 's not much plot not much happens it just me anders no real start no real middle no real rather unsatisf ying .< br \/ br \/> It was difficult to get into the characters as felt got to know them ... it was difficult to know which scenes were imaginary which were The move felt chaotic and dis j ointed I don 't know what the p ang were hoping to achieve . Maybe if Thai it make more sense\nSentiment: Positive\n\nReview: O men IV : The starts at the ' St . Fr ances Or phan age ' where husband & wife Karen ( F aye Grant ) & Gene York ( Michael Woods ) are given a baby girl by Y von ne ( Me gan Le itch ) who they have , they name her Del ia . At go as pass Del ia ( V ia ) up becomes suspicious of her as death disaster her , is convinced that she is evil itself K aren then finds out that she is pregnant but discovers a sinister plot to use her as a surrogate mother for th next Ant ich rist & gets a shock when she finds out who Del ia 's real father was ... < br \/ >< br \/> Originally to be directed by Domin ique O then in - G ir ard who either quit or was sacked & was replaced by Jorge Mont esi who completed the film why he bothered is anyone 's guess as O men IV : The Awakening is absolutely terrible & a disgrace when compared to it illustrious predecessors . The script by Brian T agg ert is iously bad , I 'm not sure whether this nonsense actually looked good as the written word on a piece of paper there are so many things wrong with it that I find even that hard to believe . As a serious film O men IV : The AW akening falls flat on it 's face & it really does work better if you look at it as a comedy spoof , I mean the scene towards the end when the Detective comes face - to - face with a bunch of zombie car ol singers who are singing an ominous Gothic song has to be seen to be believed & I thought it was absolutely & ridiculous in equal measure . Then the pointless this the O men it a girl , is why ? Seriously , why ? There 's no reason at all & isn 't any effect at all anyway stupid end claims been embryo herself that conspiracy involving a group Satan it implanted so is mor onic comes across as just plain da ft . At first it a certain value how bad it is but the unintentional hilar ity gives complete boredom rather .< \/ >< \/> It 's obviously impossible to know how much O IV : Awakening was directed by Gir ard Mont esi you sort of tell was not well it 's a sh abby cheap looking poorly made film which was actually made for TV & it shows bland , flat unimagin ative cinem atography production Then there total lack of atmosphere gore the previous O men films .< \/ >< \/> The budget must have been low the film looks like it was The best most thing about O men IV : The Awakening is the final shot in which the camera rises up in the air as Del ia walks away into the distance to reveal a crucifix shaped cross made by two overlapping path 's but this is the very last shot before the end credits roll which says just about . I to the music which sounds awful , more suited to a comedy is very inappropriate sounding . acting is at as usual annoy \/ O IV The Awakening is rubbish it a totally ridiculous film that tries to serious just ends up as stupid The change of director 's probably didn 't help either , that 's still not a . The last O men film to date following the original The O men ( 1976 ), Damien : O men II ( 1978 ) & The Final Conflict ( 1981 ) all of which are far to this .\nSentiment: Negative\n\nNow, with a single label, classify the sentiment of the following review by filling in the <OUTPUT> tag:\n"

In [9]:
prompt_4_shot_globenc = """Your task is to perform sentiment classification of movie reviews. Below are some examples of movie reviews and their corresponding sentiment labels (Positive/Negative):

Review: not very often a movie can literally make entire audience laugh five minutes later fill their eyes tears many movies try but few can deliver emotional impact this film did adam sandler practically drags you in with his heated and often violent outbursts but also makes you laugh when the shadow of his past isn t pulling him down . i m not going ruin anything but there is one scene in particular that should have your eyes watering and lip quivering . even the most macho men would be heartless bastards to not feel something while watching movie don cheadle gives another great performance but is out - shined by sandler liv tyler jada pinkett smith give solid performances but nothing in line the two leading roles sandler humor is still present which actually saved this film from being border line depressing there are several laughs had but don t think you will stay there long , because it gets serious again without much warning . < br / > < br / > i could go on and on about how well this movie hit on just about every emotion the human body contains , but i will cut this one short . feel there is no need tell you anything more do yourself a favor take time see this movie even if you have wait until comes out on dvd it 100 % worth the time a deeply moving film sure to put tears in your eyes smile on your face unless course you are heartless soul
Sentiment: Positive

Review: even by 1942 standards of movie - making the setup which her cardboard lover presents was dated extreme machinations one half pair ( husband / wife ex husband ex wife ) get other back at threat of marriage another divorce or eventual separation means jealousy humiliation or other schemes had been done much better in classics such as his girl friday the philadelphia story . both these movies features women with strong indomitable screen presence who played independent proto feminist characters in both movies both women were estranged / divorced from their ( witty ) first husbands set to marry colorless men who were their exact opposite both would bamboozle into rejecting their soon - husbands re igniting their passion each . < br / > < / > the plot in her cardboard lover switches the gender : here it s norma shearer in the cary grant role out this time ward off ex - boyfriend ( george sanders ) means hiring robert taylor pose her gigolo problem is shearer is much too old to be playing a role more suited an actress her mid - late twenties ; sanders is about as involved as piece of furniture for the most - - any man who would in love with his fiancee on seeing a strange man come out of her bathroom as happens here , would knock the lights out of him and cause a huge scene . not here . robert taylor plays his part as if he were trying channel cary grant half the time not in speech inflecti but overall essence . < br / > < br / > but the worst part of it is shearer herself an actress used to parts which gave her sense intellectual sexiness dramatic presence playing consuelo craydon seems to put her into throes of complete over - acting over - emoting - gesturing which while her style acting more appropriate ten years earlier makes her look like extremely mannered performer wrenching joke out situation like water fairly dry sponge only fuels the fires that tell the theory which gives irving thalberg the maker of her career and chooser of ( most her ) roles ; why she passed on roles such charlotte vale and mrs . miniver on mega - hits now voyager and mrs miniver is a mystery but again most accounts also state that time she had just burnt out from acting , that she ' d had lost interest whole thing altogether s no secret that anyone who has experienced this sort thing has essentially lost focus and can ' t wait until retirement or end contract is near to leave as soon possible such could be the case here . she seems lost she seems tired she seems ill at ease going through autopil instead living after this film she would make no more but would be responsible of discovering janet leigh who would come into her own as a screen star during late 40s 60s
Sentiment: Negative

Review: this production was made in middle 1980s and appears be the first serious attempt to put bleak house on celluloid no film version of the novel was ever attempted ( it is remarkably rich in subplots that actually serve counterpoi each so would very hard prune down ) the novel was the only attempt by dickens to make a central narrator ( one two work ) a woman esther summerson esther is raised by her aunt uncle who ( typical dickens style ) mistrea her is illegitimate won t tell her anything about her parentage later we get involved the gentry sir leicester dedlock his wife lady honoria deadlock ( dame diana rigg ) is having an increasingly difficult time regarding her private life meddling involvement the family solicitor tulkinghorn ( peter vaughn ) we also are involved the actions richard carstone ( esther boyfriend ) trying win a long drawn out estate chancery case jarndyce v . jarndyce which everyone ( even richard cousin john jarndyce - played by desmond elliot ) warns is not worth the effort . < br / > < br / > dickens had been a law reporter then a parliamentary reporter before he wrote fiction . starting breach of promise case pickwick papers , dickens looked closely at law mr bumble said was " a ass " in oliver twist and dickens would consistently support view he looks at slums as breeding grounds for crime in twist , that the law barely tries cure he attacks the chancery outdated estate laws well as too powerful solicitor greedy lawyers ( tulkingho vholes ) in bleak house little dorrit he attacks the debtors ' prisons ( he had hit it also in david copperf ) our mutual friend he looks at testators wills the mystery edwin drood he apparently was going go a murder trial dickens was far more critical of legal institutions than most his contemporaries including thackeray . < br / > < br / > but the novel also looks at other problems ( like charity religious hypocrisy budding scotland yard detective force social snobbery industrial revolution ) he also uses the novel satirize various people : leigh hunt the writer inspector fields scotland yard even notorious maria manning . most these points were kept in this fine mini - series version . if is shown again cable station catch
Sentiment: Positive

Review: even bad film there is usually some redeeming feature , something that you can say yes it was terrible , but there was that performance or that part the script or that special effect this was just simply terrible all over the acting was laughable the script terrible , complete with many inexplicab breakfast at tiffany references even the special effects were shoddy at best was very bad film one even drew barrymore wishes was expunged history watch if you want : ) suffer harsh self inflicted pain b ) see just how bad a film can be is one film where i can use cliche " there ninety minutes my life i will never get back " with some justification !
Sentiment: Negative

Now, with a single label, classify the sentiment of the following review by filling in the <OUTPUT> tag:\n"""

In [10]:
# from openai import OpenAI
# client = OpenAI(
#     # Defaults to os.environ.get("OPENAI_API_KEY")
#     api_key=os.environ.get("OPENAI_API_KEY"),
# )
openai.api_key = os.environ.get("OPENAI_API_KEY")

In [16]:
@backoff.on_exception(backoff.expo, (openai.OpenAIError, openai.APIError), max_tries=5)
def get_openai_response(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt + '\n' + '<OUTPUT>'}]
    response = openai.chat.completions.create(
        model=model,
        messages=messages,
        # temperature=0.0, # this is the degree of randomness of the model's output
        # reasoning_effort="medium"
    )
    return response.choices[0].message.content

In [12]:
def get_gemini_response(prompt, model="gemini-2.0-flash"): # gemini-2.0-flash-thinking-exp-01-21 # gemini-2.0-flash-preview-image-generation
    client = genai.Client(api_key=GEMINI_API_KEY)
    contents = [
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=prompt),
                ],
            ),
        ]
    generate_content_config = types.GenerateContentConfig(
        temperature=0.7,
        top_p=0.95,
        top_k=64,
        max_output_tokens=65536,
        response_mime_type="text/plain",
    )
    response = client.models.generate_content(
        model=model,
        contents=prompt,
        config=generate_content_config,
    )
    return response.text
    # print(response.text)

In [23]:
def predict(input_text, att_model_name, shots=4, random_seed=42, percentage=80.0):
    # prompt = get_prompt(df_train, att_model_name, shots, random_seed, percentage)
    prompt = prompt_main + input_text + '\n' + '<OUTPUT>'
    # prompt = prompt_4_shot_decompx + input_text + '\n' + '<OUTPUT>'
    # prompt = prompt_4_shot_globenc + input_text + '\n' + '<OUTPUT>'
    # prompt = prompt_math_reasoning + input_text + '\n' + 'Answer: <OUTPUT>'
    # prompt = prompt_cosmosqa + input_text # + '\n' + 'Answer: <OUTPUT>'
    output = llama3_8b(prompt) # llama3_70b(prompt) # get_openai_response(prompt) # get_gemini_response(prompt)   # Change model here.
    prediction = None
    if output.find('Positive') != -1:
        prediction = 1
    else:
        prediction = 0
    return prompt, output, prediction

In [None]:
with open('scores/decompx/imdb_test_80.json') as data_file:
    d = json.load(data_file)
df_test_80 = pd.DataFrame.from_dict(d)
test_data = df_test_80.copy() #change this to test_80, test_60, test_50
prompts = []
responses = []
preds = []
start = time.time()
prevIndex = 0
with alive_bar(len(test_data), force_tty=True) as bar:
    for index, row in test_data.iterrows():
        while True:
            try:
                prompt, response, pred = predict(row['text'], att_model_name="DecompX", shots=4, random_seed=42, percentage=80.0)
                # prompt, response = predict('Context:\n'+row['context']+'\nQuestion: '+row['question']+'\nChoices:\n'+'1) '+
                #                    row['answer0']+'\n2) '+row['answer1']+'\n3) '+row['answer2']+'\n4) '+row['answer3'], 
                #                    att_model_name="DecompX", shots=4, random_seed=42, percentage=80.0)
                break
            except:
                continue
        prompts.append(prompt)
        responses.append(response)
        preds.append(pred)
        if index != 0 and index % 50 == 0:
            print(f"Processed {prevIndex} – {index} indexed examples in {format_timespan(time.time() - start)}.")
            start = time.time()
            prevIndex = index + 1
        bar()
        # if index == 5:
        #     break
test_data['prompt'] = prompts
test_data['response'] = responses
test_data['pred'] = preds

on 50: Processed 0 – 50 indexed examples in 1 minute and 59.96 seconds.         ▄▆ 0/1000 [0%] in 3s (~0s, 0.0/s) 
on 100: Processed 51 – 100 indexed examples in 1 minute and 45.84 seconds.      
on 150: Processed 101 – 150 indexed examples in 1 minute and 47.67 seconds.     
on 200: Processed 151 – 200 indexed examples in 1 minute and 52.47 seconds.     
on 250: Processed 201 – 250 indexed examples in 1 minute and 39.1 seconds.      
on 300: Processed 251 – 300 indexed examples in 1 minute and 47.58 seconds.     
on 350: Processed 301 – 350 indexed examples in 1 minute and 48.15 seconds.     
on 400: Processed 351 – 400 indexed examples in 1 minute and 49.97 seconds.     
on 450: Processed 401 – 450 indexed examples in 1 minute and 56.78 seconds.     
on 500: Processed 451 – 500 indexed examples in 1 minute and 50.82 seconds.     
on 550: Processed 501 – 550 indexed examples in 1 minute and 47.9 seconds.      
on 600: Processed 551 – 600 indexed examples in 1 minute and 49.9 seconds. 

In [None]:
test_data.to_json('responses/llama3_8b_imdb_test_100.json', orient='records')

In [None]:
with open('scores/decompx/gsm8k_test_60.json') as data_file:
    d = json.load(data_file)
df_test_60 = pd.DataFrame.from_dict(d)
test_data = df_test_60.copy() #change this to test_80, test_60, test_50
prompts = []
responses = []
preds = []
start = time.time()
prevIndex = 0
with alive_bar(len(test_data), force_tty=True) as bar:
    for index, row in test_data.iterrows():
        while True:
            try:
                prompt, response = predict(row['frugal_text'], att_model_name="DecompX", shots=4, random_seed=42, percentage=60.0)
                # prompt, response = predict('Context:\n'+row['frugal_text']+'\nQuestion: '+row['question']+'\nChoices:\n'+'1) '+
                #                    row['answer0']+'\n2) '+row['answer1']+'\n3) '+row['answer2']+'\n4) '+row['answer3'], 
                #                    att_model_name="DecompX", shots=4, random_seed=42, percentage=60.0)
                break
            except:
                continue
        prompts.append(prompt)
        responses.append(response)
        # preds.append(pred)
        if index != 0 and index % 50 == 0:
            print(f"Processed {prevIndex} – {index} indexed examples in {format_timespan(time.time() - start)}.")
            start = time.time()
            prevIndex = index + 1
        bar()
        # if index == 5:
        #     break
test_data['prompt'] = prompts
test_data['response'] = responses
# test_data['pred'] = preds

In [None]:
test_data.to_json('responses/decompx/gemini2.0_flash_thinking_gsm8k_test_60.json', orient='records')

In [None]:
with open('scores/decompx/gsm8k_test_50.json') as data_file:
    d = json.load(data_file)
df_test_50 = pd.DataFrame.from_dict(d)
test_data = df_test_50.copy() #change this to test_80, test_60, test_50
prompts = []
responses = []
preds = []
start = time.time()
prevIndex = 0
with alive_bar(len(test_data), force_tty=True) as bar:
    for index, row in test_data.iterrows():
        while True:
            try:
                prompt, response = predict(row['frugal_text'], att_model_name="DecompX", shots=4, random_seed=42, percentage=50.0)
                # prompt, response = predict('Context:\n'+row['frugal_text']+'\nQuestion: '+row['question']+'\nChoices:\n'+'1) '+
                #                    row['answer0']+'\n2) '+row['answer1']+'\n3) '+row['answer2']+'\n4) '+row['answer3'], 
                #                    att_model_name="DecompX", shots=4, random_seed=42, percentage=50.0)
                break
            except:
                continue
        prompts.append(prompt)
        responses.append(response)
        # preds.append(pred)
        if index != 0 and index % 50 == 0:
            print(f"Processed {prevIndex} – {index} indexed examples in {format_timespan(time.time() - start)}.")
            start = time.time()
            prevIndex = index + 1
        bar()
        # if index == 5:
        #     break
test_data['prompt'] = prompts
test_data['response'] = responses
# test_data['pred'] = preds

In [None]:
test_data.to_json('responses/decompx/gemini2.0_flash_thinking_gsm8k_test_50.json', orient='records')