In [None]:
!pip install langchain transformers accelerate pandas

In [2]:
import torch
from langchain import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from langchain import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain.memory import SimpleMemory
import time
from langchain.schema.output_parser import BaseLLMOutputParser
import re
import pandas as pd

In [3]:
!python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('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')"

In [None]:
MODEL_NAME_2 = "siddanshchawla/Llama-2-7b-hf-arithmetic-solver-v7"

tokenizer_2 = AutoTokenizer.from_pretrained(MODEL_NAME_2, use_fast=True)

model_2 = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME_2, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
)

In [5]:
generation_config_2 = GenerationConfig.from_pretrained(MODEL_NAME_2)
generation_config_2.max_new_tokens = 512
generation_config_2.temperature = 0.00001
generation_config_2.top_p = 0.95
generation_config_2.do_sample = True
generation_config_2.repetition_penalty = 1.15

In [6]:
text_pipeline_2 = pipeline(
    "text-generation",
    model=model_2,
    tokenizer=tokenizer_2,
    return_full_text=True,
    generation_config=generation_config_2,
)

llm_2 = HuggingFacePipeline(pipeline=text_pipeline_2, model_kwargs={"temperature": 0})

In [29]:
template_2 = """### Instruction:\nWhat is the numerical value of {program}?\n### Output:\n"""
prompt_2 = PromptTemplate(
    input_variables=["program"],
    template=template_2
)

In [47]:
class MyOutputParser_2(BaseLLMOutputParser):
    def __init__(self):
        super().__init__()

    def parse_result_2(self, text):
        text = str(text)
        pattern = r'The final answer is (-?\d+(\.\d+)?)'
        expressions = re.findall(pattern, text)

        if expressions:
            return expressions[0][0]
        else:
            return "Expression not found"
            
    def parse_result(self, text):
        return self.parse_result_2(text) 

In [48]:
the_output_parser_2 = MyOutputParser_2()

In [49]:
chain_2 = LLMChain(llm=llm_2, prompt=prompt_2, output_key="answer", output_parser=the_output_parser_2)

In [None]:
import pandas as pd
data = pd.read_csv('test.csv')
data.head()

In [None]:
for i in range(len(data)):
    start_time = time.time()
    program = data['program'][i]
    results = chain_2.run({"program": program})
    data['final_answer'][i] = results['answer']
time2 = (time.time() - start_time)
print(f"Avg Time: " {time2/len(data)})