In [1]:
import google.generativeai as genai
import os

genai.configure(api_key='APIKEY')

model = genai.GenerativeModel('gemini-pro')

In [2]:
import pandas as pd
test_df = pd.read_csv('INPUTPATH', sep="\t", header=None)
test_sentences = []
for _,row in test_df.iterrows():
    test_sentences.append(row[0])


In [3]:
#zero shot

def generate_response_gemini(text):
  try:
    completion = model.generate_content(text)
    output = completion.text
    # print(output)
    return output
  except Exception as ex:
    print(ex)
    return ""


In [4]:
def create_prompt(text):
    return f"""
        Given the following ontology, please extract the triples from the text according to the relations in the ontology. In the output, only include the triples in the given output format.
        CONTEXT: Ontology Concepts:
        PSCategories, InterAgentRelationshipTypes, Place, EntityWithProvenance, TemporalExtent, Organization, OriginRecord, RaceRecord, Person, PlaceCV, NameVariant, Occupations, AgeRecord, TimeSpan, MatchType, ParticipantRoleRecord, AgentRecord, Agent, AgeCategory, Event, ParticipantRoleTypes, PersonStatusRecord, ResearchProject, Researcher, SpatialExtent, LicenseInformation, Match, ProvenanceActivity, OccupationRecord, Coordinates, ECVO, SexRecord, DocumentTypes, InterAgentRelationshipRecord, SexTypes, ExternalReference, ExternalReferent, EventTypes, PlaceTypeCV, ResearchProjectContributorRole, ResearchProjectPIRole, PersonRecord, Description, NameRecord
        Ontology Relations:
        Is-a(Match,EntityWithProvenance), Is-a(OccupationRecord,AgentRecord), Is-a(ResearchProject,Event), Is-a(ResearchProjectPIRole,Researcher), Is-a(Place,EntityWithProvenance), Is-a(AgeRecord,AgentRecord), Is-a(SexRecord,AgentRecord), Is-a(Person,Agent), Is-a(RaceRecord,AgentRecord), Is-a(AgentRecord,EntityWithProvenance), Is-a(TimeSpan,TemporalExtent), Is-a(Coordinates,SpatialExtent), Is-a(ParticipantRoleRecord,AgentRecord), Is-a(NameRecord,AgentRecord), Is-a(PersonRecord,AgentRecord), Is-a(ResearchProjectContributorRole,Researcher), Is-a(ResearchProject,ProvenanceActivity), Is-a(PersonStatusRecord,AgentRecord), Is-a(Organization,Agent), Is-a(OriginRecord,AgentRecord), Is-a(InterAgentRelationshipRecord,AgentRecord), Is-a(Description,EntityWithProvenance), isRelationshipFrom(InterAgentRelationshipRecord,Agent), fallsWithin(TemporalExtent,TimeSpan), occursBefore(TemporalExtent,xsd:date), hasOriginalSourceType(EntityWithProvenance,DocumentTypes), refersToPlaceOfOrigin(OriginRecord,Place), hasLicenseInformation(EntityWithProvenance,LicenseInformation), startsAt(TemporalExtent,xsd:date), occursAfter(TemporalExtent,xsd:date), isDirectlyBasedOn(EntityWithProvenance,EntityWithProvenance), generatedBy(EntityWithProvenance,ProvenanceActivity), performedBy(ProvenanceActivity,Agent), attributedTo(EntityWithProvenance,Agent), availableFrom(EntityWithProvenance,xsd:string), hasEventType(Event,EventTypes), hasParticipantRoleType(ParticipantRoleRecord,ParticipantRoleTypes), hasInterAgentRelationshipType(InterAgentRelationshipRecord,InterAgentRelationshipTypes), hasStatusGeneratedEvent(PersonStatusRecord,Event), providesParticipantRole(ResearchProjectPIRole,ResearchProject), used(EntityWithProvenance,EntityWithProvenance), withID(ExternalReference,xsd:string), contains(TemporalExtent,xsd:date), providesParticipantRole(ResearchProjectContributorRole,ResearchProject), refersTo(ExternalReference,ExternalReferent), isRelationshipTo(InterAgentRelationshipRecord,Agent), used(ProvenanceActivity,EntityWithProvenance), endsAt(TemporalExtent,xsd:date), hasECVO(OriginRecord,ECVO)

        Test Text: {text}
        Test Output: """.strip()

In [9]:
#GPT

In [10]:
import os
from openai import OpenAI

client = OpenAI(api_key = "")

import time


def ChatGPT_conversation(conversation, model_id):
    retries = 5
    while retries > 0:
        try:
            response =  client.chat.completions.create(
                model=model_id,
                temperature=1,
                messages=conversation
            )
            conversation.append({'role': response.choices[0].message.role, 'content': response.choices[0].message.content})
            return conversation

        except Exception as e:
            print(e)
            print('Timeout error, retrying...')
            retries -= 1
            time.sleep(5)

    print("API Not responding after 5 tries")
    conversation.append({"role": "user", "content": "No Response"})
    return conversation


def gpt_3_predict(prompt):
    model_id = 'gpt-3.5-turbo'
    conversation = []
    conversation.append({'role': 'user', 'content': prompt})
    conversation = ChatGPT_conversation(conversation, model_id)
    # print('{0}: {1}\n'.format(conversation[-1]['role'].strip(), conversation[-1]['content'].strip()))
    return conversation[-1]['content'].strip()


def gpt_4_predict(prompt):
    model_id = 'gpt-4-1106-preview'
    conversation = []
    conversation.append({'role': 'user', 'content': prompt})
    conversation = ChatGPT_conversation(conversation, model_id)
    # print('{0}: {1}\n'.format(conversation[-1]['role'].strip(), conversation[-1]['content'].strip()))
    return conversation[-1]['content'].strip()



In [11]:

df = pd.DataFrame()
df["sentences1"] = test_sentences

In [12]:
df["gpt_pmpt"] = df["sentences1"].apply(lambda x: create_prompt(x))

In [None]:

for index, item in enumerate(df["gpt_pmpt"].tolist()):
  predictions = []
  # preds1 = generate_response_gemini(item)
  preds2 = gpt_3_predict(item)
  preds3 = gpt_4_predict(item)

  out_df = pd.DataFrame([[test_sentences[index], preds2, preds3]])

  out_df.to_csv("OUTPATH", sep="\t", index=None, mode="a", header=None)
