In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from pprint import pprint
from pathlib import Path
MODEL_NAME = "microsoft/Phi-3-mini-128k-instruct"
def get_tokenizer(tokenizerdir="./tokenizer"):
  tokenizerexists = Path(tokenizerdir).exists()
  tokenizer_path = tokenizerdir if tokenizerexists else MODEL_NAME
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
  if not tokenizerexists:
    tokenizer.save_pretrained(tokenizerdir)
  return tokenizer
def create_pipe(modeldir="./model"):
  modelexists = Path(modeldir).exists()
  model_path = modeldir if modelexists else MODEL_NAME
  model = AutoModelForCausalLM.from_pretrained(
      MODEL_NAME,
      device_map="cuda",
      torch_dtype="auto",
      trust_remote_code=True,
  )
  if not modelexists:
    model.save_pretrained(modeldir)
  pipe = pipeline(
      "text-generation",
      model=model,
      tokenizer=tokenizer,
  )
  return pipe
generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}

In [None]:
tokenizer = get_tokenizer()

In [None]:
prompts = [{'messages': [{'role': 'system', 'content': "List of predicates is ['org:founded', 'org:subsidiaries', 'per:date_of_birth', 'per:cause_of_death', 'per:age', 'per:stateorprovince_of_birth', 'per:countries_of_residence', 'per:country_of_birth', 'per:stateorprovinces_of_residence', 'org:website', 'per:cities_of_residence', 'per:parents', 'per:employee_of', 'NA', 'per:city_of_birth', 'org:parents', 'org:political/religious_affiliation', 'per:schools_attended', 'per:country_of_death', 'per:children', 'org:top_members/employees', 'per:date_of_death', 'org:members', 'org:alternate_names', 'per:religion', 'org:member_of', 'org:city_of_headquarters', 'per:origin', 'org:shareholders', 'per:charges', 'per:title', 'org:number_of_employees/members', 'org:dissolved', 'org:country_of_headquarters', 'per:alternate_names', 'per:siblings', 'org:stateorprovince_of_headquarters', 'per:spouse', 'per:other_family', 'per:city_of_death', 'per:stateorprovince_of_death', 'org:founded_by']. What Subject-Predicate-Object triples are included in the task sentence? Each triple is in the form ('subject', 'predicate', 'object'). 'predicate' must be from the list of predicates only. Only use 'NA' when you cannot find any reasonable predicates. Only return the triples in a Python list."}, {'role': 'user', 'content': " [('USA Network', 'NA', 'Turfway Park')]"}, {'role': 'assistant', 'content': "The series leading to the may 1 derby will begin march 27 on usa network with the louisiana derby from new orleans, and will also include the lane 's end stakes from turfway park in florence, ky.. "}, {'role': 'user', 'content': " [('MBIA', 'org:top_members/employees', 'Charles E. Chaplin')]"}, {'role': 'assistant', 'content': "The bond insurers declined to comment on friday, though on thursday, mbia 's chief financial officer, charles e. chaplin, vigorously defended his company at a hearing in congress and said it did not need any help. "}, {'role': 'assistant', 'content': 'A result is that for a variety of reasons, several prominent corporations that typically promote chief executives from within have turned to the outside to fill their top spot in recent years, including boeing, chrysler, con-agra, ford, hewlett- packard and 3m. '}], 'label': "[('Con-Agra', 'NA', 'Chrysler')]"}, {'messages': [{'role': 'system', 'content': "List of predicates is ['org:founded', 'org:subsidiaries', 'per:date_of_birth', 'per:cause_of_death', 'per:age', 'per:stateorprovince_of_birth', 'per:countries_of_residence', 'per:country_of_birth', 'per:stateorprovinces_of_residence', 'org:website', 'per:cities_of_residence', 'per:parents', 'per:employee_of', 'NA', 'per:city_of_birth', 'org:parents', 'org:political/religious_affiliation', 'per:schools_attended', 'per:country_of_death', 'per:children', 'org:top_members/employees', 'per:date_of_death', 'org:members', 'org:alternate_names', 'per:religion', 'org:member_of', 'org:city_of_headquarters', 'per:origin', 'org:shareholders', 'per:charges', 'per:title', 'org:number_of_employees/members', 'org:dissolved', 'org:country_of_headquarters', 'per:alternate_names', 'per:siblings', 'org:stateorprovince_of_headquarters', 'per:spouse', 'per:other_family', 'per:city_of_death', 'per:stateorprovince_of_death', 'org:founded_by']. What Subject-Predicate-Object triples are included in the task sentence? Each triple is in the form ('subject', 'predicate', 'object'). 'predicate' must be from the list of predicates only. Only use 'NA' when you cannot find any reasonable predicates. Only return the triples in a Python list."}, {'role': 'user', 'content': " [('David Gregory', 'NA', 'Virginia')]"}, {'role': 'assistant', 'content': "Just earlier david gregory was interviewing cantor, and he talked about the virginia primary ballot and said `` there 's no one else on the ballot with romney ''. "}, {'role': 'assistant', 'content': 'The 23-year-old halim told the jakarta globe that she is a member of the church, for which she did humanitarian work after the asian tsunami in 2004, the mail and guardian said. '}], 'label': "[('Jakarta Globe', 'NA', '2004')]"}, {'messages': [{'role': 'system', 'content': "List of predicates is ['org:founded', 'org:subsidiaries', 'per:date_of_birth', 'per:cause_of_death', 'per:age', 'per:stateorprovince_of_birth', 'per:countries_of_residence', 'per:country_of_birth', 'per:stateorprovinces_of_residence', 'org:website', 'per:cities_of_residence', 'per:parents', 'per:employee_of', 'NA', 'per:city_of_birth', 'org:parents', 'org:political/religious_affiliation', 'per:schools_attended', 'per:country_of_death', 'per:children', 'org:top_members/employees', 'per:date_of_death', 'org:members', 'org:alternate_names', 'per:religion', 'org:member_of', 'org:city_of_headquarters', 'per:origin', 'org:shareholders', 'per:charges', 'per:title', 'org:number_of_employees/members', 'org:dissolved', 'org:country_of_headquarters', 'per:alternate_names', 'per:siblings', 'org:stateorprovince_of_headquarters', 'per:spouse', 'per:other_family', 'per:city_of_death', 'per:stateorprovince_of_death', 'org:founded_by']. What Subject-Predicate-Object triples are included in the task sentence? Each triple is in the form ('subject', 'predicate', 'object'). 'predicate' must be from the list of predicates only. Only use 'NA' when you cannot find any reasonable predicates. Only return the triples in a Python list."}, {'role': 'assistant', 'content': "Directed by david gordon green; written by green, based on the novel by stewart o'nan; director of photography, tim orr; edited by william anderson; music by david wingo and jeff mcilwain; production designer, richard wright; produced by dan lindau, paul miller, lisa muskat and cami taylor; released by warner independent pictures. "}], 'label': "[('William Anderson', 'NA', 'Paul Miller')]"}, {'messages': [{'role': 'system', 'content': "What Subject-Predicate-Object triples are included in the task sentence? Each triple is in the form ('subject', 'predicate', 'object'). Only return the triples in a Python list."}, {'role': 'user', 'content': " [('USA Network', 'NA', 'Turfway Park')]"}, {'role': 'assistant', 'content': "The series leading to the may 1 derby will begin march 27 on usa network with the louisiana derby from new orleans, and will also include the lane 's end stakes from turfway park in florence, ky.. "}, {'role': 'user', 'content': " [('MBIA', 'org:top_members/employees', 'Charles E. Chaplin')]"}, {'role': 'assistant', 'content': "The bond insurers declined to comment on friday, though on thursday, mbia 's chief financial officer, charles e. chaplin, vigorously defended his company at a hearing in congress and said it did not need any help. "}, {'role': 'assistant', 'content': 'A result is that for a variety of reasons, several prominent corporations that typically promote chief executives from within have turned to the outside to fill their top spot in recent years, including boeing, chrysler, con-agra, ford, hewlett- packard and 3m. '}], 'label': "[('Con-Agra', 'NA', 'Chrysler')]"}, {'messages': [{'role': 'system', 'content': "What Subject-Predicate-Object triples are included in the task sentence? Each triple is in the form ('subject', 'predicate', 'object'). Only return the triples in a Python list."}, {'role': 'user', 'content': " [('David Gregory', 'NA', 'Virginia')]"}, {'role': 'assistant', 'content': "Just earlier david gregory was interviewing cantor, and he talked about the virginia primary ballot and said `` there 's no one else on the ballot with romney ''. "}, {'role': 'assistant', 'content': 'The 23-year-old halim told the jakarta globe that she is a member of the church, for which she did humanitarian work after the asian tsunami in 2004, the mail and guardian said. '}], 'label': "[('Jakarta Globe', 'NA', '2004')]"}, {'messages': [{'role': 'system', 'content': "What Subject-Predicate-Object triples are included in the task sentence? Each triple is in the form ('subject', 'predicate', 'object'). Only return the triples in a Python list."}, {'role': 'assistant', 'content': "Directed by david gordon green; written by green, based on the novel by stewart o'nan; director of photography, tim orr; edited by william anderson; music by david wingo and jeff mcilwain; production designer, richard wright; produced by dan lindau, paul miller, lisa muskat and cami taylor; released by warner independent pictures. "}], 'label': "[('William Anderson', 'NA', 'Paul Miller')]"}]

In [None]:
def lens(data):
  return [len(d) for d in data]
for messages in prompts:
  tokenized = tokenizer.apply_chat_template(messages["messages"], tokenize=True, add_generation_prompt=True)
  print(len(tokenized))
  #print(lens(tokenized["input_ids"]))

In [None]:
with open("phi3prompts.txt", "w") as fw:
  for messages in prompts:
      pipe = create_pipe()
      output = pipe(messages["messages"], **generation_args)
      output = output[0]['generated_text']
      pprint(f"Prompt: {messages['messages']}")
      print(f"\n\nLabel: {messages['label']}\nPrediction: {output}\n")
      pprint(messages["messages"], fw)
      print(f"\n\nLabel: {messages['label']}\nPrediction: {output}\n", file=fw)