In [1]:
!pip install -q git+https://github.com/huggingface/transformers

In [2]:
import re
import json
import torch
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM

model_path="/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1"
tokenizer=AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map = "auto",
    trust_remote_code = True,
    torch_dtype = torch.bfloat16,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
def preprocess_prompt(prompt):
    prompt = prompt.replace("“", "'")
    prompt = prompt.replace("”", "'")
    return prompt

def generate_output(prompt):
    messages = [{
        "role":"user",
        "content": prompt
    }]

    model_inputs = tokenizer.apply_chat_template(messages, return_tensors = "pt")
    generated_ids = model.generate(
        model_inputs.to('cuda'),
        max_new_tokens = 1000,
        do_sample = True,
    )
    
    decoded = tokenizer.batch_decode(generated_ids)
    output = decoded[0].split('[/INST]')[1][:-4]
    return output

def process_output(output):
    final = output.replace("\\", "")
    
    beg = final.find('{')
    if beg == -1:
        beg = 0
    end = final.rfind('}')
    
    final = final[beg:end+1]
    final = json.loads(final)
    return final

In [4]:
prompt = """
Obituary Details: {obituary}

Task: {task}

Output Format: Proper JSON Object
"""

obits = [
    "Yulonda “Lonnie” Bascia Hollis, 49, of Packard Ave., Kannapolis, N.C., passed away on Tuesday, August 31, 2021 at her residence.  She was born August 18, 1972 in Cabarrus County to Kenneth Crawford and Lorene Hollis Russell. Yulonda was educated in the Cabarrus County School System  and attended A.L. Brown High School.  She was formerly employed with FedEx.  Services will be held on Wednesday, September 8, 2021, at Living Water Church of God 166 N. Little Texas Rd. Kannapolis.  Visitation will be 1:00 PM – 2:00 PM; with funeral services at 2:00 PM. Burial will follow at Bethel Baptist Church Cemetery. She was preceded in death by a brother, Cedric Hill; grandmother, Essie Hollis and stepfathers, Robin Brown  and Cleatus Gill. She leaves to cherish fond memories two daughters, Kiana Hollis and Jasmine Hollis both of Kannapolis; one son, Clinton Thompson of Kannapolis; seven grandchildren; her parents, Kenneth Crawford (Regina) pf Mooresville, NC, Lorene Russell of Kannapolis;  fiancé, Len Harold; one brother, Rodricko Hill (Lavita) of Charlotte, NC; four sisters, Gytanna Hollis of Concord, Shakitta Hill (Jermaine Smith) of Kannapolis, Tara Crawford of Mooresville, NC, Melonie Patrice Love of Dallas, TX; grandmother, Lula Mae Brown of Charlotte, NC; grandfather, Cletus Gill (Sue) of Kannapolis; stepbrother, Rev. Chavis Maxwell (Tosha) of Kannapolis; stepsister, Tameca Robinson of Salisbury and a host of nieces, nephews, cousins and friends.",
    "Marvin Coulter, Jr., age 86, of Springfield, passed away at 3:08 a.m., Friday, September 3, 2021 after an illness at the home of his daughter, Ruth. He was born in Marion County on May 3, 1935 to the late Marvin and Mildred Lanham Coulter. He was a member of the Temple Baptist Church, a lifelong farmer and for ten years managed the St. Catharine Farm. Preceding him in death was a brother, Vernon Coulter on April 18, 2020 and an infant sister, Virginia Ann Coulter on January 4, 1948. Survivors include his wife, Virginia Brown Coulter; two daughters, Ruth Smith (Jim) and Karen Newsome (Ronnie) of Springfield; two sons, Jeff Coulter of Springfield and Stanley Coulter (Cheryl) of Danville; five grandchildren, Brad Coulter (Heather), Brandon Coulter (Sara), Kim Lane (Steve), Heather Hall and Brittany Smith; seven great grandchildren; three sisters, Brenda Stevens (Al) of Springfield, Rita Stirn of Louisville and Darlene Ferguson (Ben) of Russellville and four brothers, Russell Coulter (Brenda) of Chaplin, Harold Coulter of Lawrenceburg, Bruce Coulter (Debbie) of Bardstown and Kelly Coulter (Marilyn) of Springfield. Funeral services will be held at 11:00 a.m., Wednesday, September 8th at the Temple Baptist Church with Rev. John McDaniel, officiating. He will be assisted by Rev. Kyle Franklin. Burial will be in the Evergreen Cemetery at Willisburg. Serving as pallbearers will be Brad Coulter, Brandon Coulter, Kim Lane, Heather Hall, Brittany Smith and Mike Teater. Visitation will be from 4 until 8 Tuesday at the Temple Baptist Church. Carey & Son Funeral Home is in charge of arrangements.",
    "Patricia Carol “Patti” Spalding, age 70 of the Fredericktown Community of Washington County, passed away at 1:50 p.m., Friday, September 3, 2021 at Baptist Health in Lexington. She was born in Lebanon, Kentucky on January 26, 1951 to the late William Wheeler and Mary Roberta Curtsinger Spalding. She was a member of the Holy Trinity Catholic Church, a 1969 graduate of Washington County High School and a homemaker. Preceding her in death was a brother, William W. “Bill” Spalding on November 2, 2009 and two sisters, Elizabeth Ann “Betty” Cheatham on August 22, 2019 and Norma Jean Burkhead on November 23, 2018. Survivors include her husband, Marlin Spalding; two sons, Tim (Sherie) Spalding of Scottsville and Kevin (Corey) Spalding of Louisville; two daughters, Dawn (Chuck) Hagan and Amanda (Bobby Joe) Mattingly of Springfield; twelve grandchildren, Steven Hagan, Halie Ward, Nathan Spalding, Nick Hagan, Madison Snellen, Isaiah Mattingly, Aidyn Snellen, Anna Kate Mattingly, Jackson Spalding, Emma Grace Mattingly, Emily Spalding and Noah Mattingly; one great grandchild, Kaylee Mae Hagan; a sister, Mary Sue (Gary) Goff of Bardstown and two brothers, Jimmy (Rose) Spalding and Jerry Spalding of Louisville. A Mass of Christian Burial will be held at 10:00 a.m., Monday, September 6th at the Holy Trinity Catholic Church with the Most Reverend J. Mark Spalding, officiating, Burial will be in the Holy Trinity Cemetery, Serving as pallbearers will be Madison Snellen, Halie Ward, Nathan Spalding, Isaiah Mattingly, John Spalding, Martin Spalding, David Spalding and Jim Spalding. Serving as honorary pallbearers will be Aidyn Snellen, Emma Grace Mattingly, Anna Kate Mattingly, Noah Mattingly, Jackson Spalding, Emily Spalding, Nick Hagan and Steven Hagan. A prayer service will be held at 6:00 Sunday evening at the Carey & Son Funeral Home Visitation will be from 4 until 8 Sunday at the funeral home. In lieu of flowers contributions may be made to, St. Jude Children’s Research Hospital, 501 St. Jude Place, Memphis, TN 38105. Carey & Son Funeral Home is in charge of arrangements.",
    "Cam Baker 75 years old of Georgetown, Ohio, passed away on September 3, 2021, at her home in Georgetown, Ohio.  Wife of the late: Dr. Robert Baker.  She is survived by her Daughter: April Baker. 1  Granddaughter: Abby Baker.  2 Sisters:  Penny (Ross) Owens and Millie (Larry) Unger. 2 Brothers: Monte (Barbara) Williford and the late Mike Williford.  Sister-in-law: Joan Williford.    Numerous, Nieces, Nephews, Other Family and Friends.  Graveside Funeral Services will be at the Confidence Cemetery, Georgetown, Ohio on Tuesday, September 7, 2021 at 10:00 AM.  Arrangements by the Charles H McIntyre Funeral Home, 323 Union Street, Felicity, Ohio, 45120.  Please visit our webpage to leave condolences www.charleshmcintyrefuneralhome.com.",
    "Claree Blakeney Griffin 65,  was born September 24, 1955 to the late James McKinley Blakeney and Gracie Rorie in Union County, NC, and departed her life on earth on August 31, 2021 at Novant Health Matthews Medical Center in Matthews, NC. She leaves to cherish her fond memories to her sons: Maceo Griffin of Georgia, Christopher Griffin (Candice) of Charlotte, NC; daughter, Victoria Griffin of Monroe, NC; brothers: Ronald Blakeney of Advance, NC and Roger Blakeney (Tiny) of Raleigh, NC; sisters: Gladys Blakeney of Marshville, NC, Peggy Baker and Catherine Taylor both of Monroe, NC; (14) grandchildren and (1) great-grand child and a host of nieces, nephews, cousins, relatives and other friends.",
]

task = "Extract the below attributes if they exist from the above obituary details: name, age, \
death city, death state, death date, birth date, birth city, parents, grandparents, children, \
spouse, sons or daughters in law, college, occupations, funeral home name."

In [5]:
results = []

for i in range(len(obits)):
    prompt_copy = prompt.format(obituary=obits[i], task=task)
    prompt_copy = preprocess_prompt(prompt_copy)
    output = generate_output(prompt_copy)
    results.append(process_output(output))
    
with open("output.json", "w") as jf:
    output = json.dumps(results, indent=2)
    jf.write(output)
    print(output)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

[
  {
    "name": "Yulonda 'Lonnie' Bascia Hollis",
    "age": 49,
    "death city": "Packard Ave.",
    "death state": "Kannapolis",
    "death date": "August 31, 2021",
    "birth city": null,
    "birth state": null,
    "birth date": "August 18, 1972",
    "parents": [
      {
        "father": "Kenneth Crawford"
      },
      {
        "mother": "Lorene Hollis Russell"
      }
    ],
    "grandparents": [
      {
        "grandmother": "Essie Hollis"
      },
      {
        "grandfather": "Cletus Gill",
        "step-grandfather": "Cleatus Gill"
      }
    ],
    "children": [
      {
        "daughter": "Kiana Hollis"
      },
      {
        "daughter": "Jasmine Hollis"
      },
      {
        "son": "Clinton Thompson"
      }
    ],
    "spouse": null,
    "sons in law": [],
    "daughters in law": [],
    "college": null,
    "occupation": null,
    "funeral home": "Living Water Church of God"
  },
  {
    "name": "Marvin Coulter, Jr.",
    "age": 86,
    "death_city": "Sp