In [1]:
model_name = "meta-llama/Meta-Llama-3-70B-Instruct" # meta-llama/Llama-3.1-8B-Instruct

In [3]:
import os
import sys

# Assuming 'src' is one level down (in the current directory or a subdirectory)
path_to_src = os.path.join('src')  # Moves one level down to 'src' folder

# Add the path to sys.path
sys.path.append(path_to_src)

# Now you can import your API_key module
import API_key as key

In [5]:
from langchain_core.prompts import ChatPromptTemplate

# Simplified system template
system_template = """
<Context>
You are a knowledgeable assistant whose task is to provide two arrays: one for the names of the capitals and another for the primary languages spoken in the provided list of countries. 
Respond solely with the information requested, formatted as specified.
</Context>

<Data Structure>
The provided countries are simply an array of country names.
</Data Structure>

<Task>
Write a JSON object containing two arrays: "capital_names" for the names of the requested capitals and "languages" for the primary languages spoken in those capitals. 
Ensure that the information is structured as follows:

{{
  "capitals": [
   {{
      "name": "name1",
      "language": "language1"
    }},
    {{
      "name": "name2",
      "language": "language2"
    }},
    {{
      "name": "name3",
      "language": "language3"
    }}
  ]
}}

Please respond with the entire JSON structure as a dictionary called "capitals", exactly as shown above, without any additional formatting or text.
</Task>
"""


# Simplified user template
user_template = """
What are the capitals of {userinput}, along with the languages spoken there?
"""

# Set up the ChatPromptTemplate
prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", user_template)]
)

# Test the invoke with simplified templates
result = prompt_template.invoke({"userinput": "Germany, USA, France, China, Australia, and South Africa"})
print(result)

messages=[SystemMessage(content='\n<Context>\nYou are a knowledgeable assistant whose task is to provide two arrays: one for the names of the capitals and another for the primary languages spoken in the provided list of countries. \nRespond solely with the information requested, formatted as specified.\n</Context>\n\n<Data Structure>\nThe provided countries are simply an array of country names.\n</Data Structure>\n\n<Task>\nWrite a JSON object containing two arrays: "capital_names" for the names of the requested capitals and "languages" for the primary languages spoken in those capitals. \nEnsure that the information is structured as follows:\n\n{\n  "capitals": [\n   {\n      "name": "name1",\n      "language": "language1"\n    },\n    {\n      "name": "name2",\n      "language": "language2"\n    },\n    {\n      "name": "name3",\n      "language": "language3"\n    }\n  ]\n}\n\nPlease respond with the entire JSON structure as a dictionary called "capitals", exactly as shown above, wit

In [6]:
from langchain_openai import ChatOpenAI
from langchain.callbacks import get_openai_callback


# Initialize the ChatOpenAI model
model = ChatOpenAI(model=model_name, openai_api_key=key.hugging_api_key, openai_api_base="https://api-inference.huggingface.co/v1/", max_tokens=500, temperature=0.2)

In [7]:
# Configure the model with structured output
#structured_llm = model.with_structured_output(json_schema, include_raw=True)
#chain = prompt_template | structured_llm

chain = prompt_template | model

# Execute the model and output response details
with get_openai_callback() as cb:
    response = chain.invoke(
        {"userinput": "Germany, USA, France, China, Australia, Angola, Egypt"}
    )
    print(cb)
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Total Cost (USD): ${cb.total_cost}")

Tokens Used: 405
	Prompt Tokens: 255
	Completion Tokens: 150
Successful Requests: 1
Total Cost (USD): $0.0
Total Tokens: 405
Prompt Tokens: 255
Completion Tokens: 150
Total Cost (USD): $0.0


In [8]:
import json
import pandas as pd

print(response.content)


# Load the JSON data
data = json.loads(response.content)

# Convert to DataFrame
df = pd.DataFrame(data['capitals'])

# Display the DataFrame
print(df)

{
  "capitals": [
    {
      "name": "Berlin",
      "language": "German"
    },
    {
      "name": "Washington, D.C.",
      "language": "English"
    },
    {
      "name": "Paris",
      "language": "French"
    },
    {
      "name": "Beijing",
      "language": "Mandarin Chinese"
    },
    {
      "name": "Canberra",
      "language": "English"
    },
    {
      "name": "Luanda",
      "language": "Portuguese"
    },
    {
      "name": "Cairo",
      "language": "Arabic"
    }
  ]
}
               name          language
0            Berlin            German
1  Washington, D.C.           English
2             Paris            French
3           Beijing  Mandarin Chinese
4          Canberra           English
5            Luanda        Portuguese
6             Cairo            Arabic


In [None]:
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=OpenAIEmbeddings(api_key=openAI_key))


In [None]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

from langchain.vectorstores import utils as chromautils

# ChromaDB doesn't support complex metadata, e.g. lists, so we drop it here.
# If you're using a different vector store, you may not need to do this

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
#vectorstore = Chroma.from_documents(documents, embeddings)
#retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

vectorstore_RR = Chroma.from_documents(documents=splits_RR, embedding=embeddings)
retriever_RR = vectorstore_RR.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Load needed modules

In [11]:
# modules:
import pandas as pd
import os
from openai import OpenAI
import numpy as np

import re

import math

from langchain_openai import ChatOpenAI

# to read pdf files
from langchain_community.document_loaders import PyPDFLoader

# to read text of pdf files
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


from langchain import PromptTemplate, LLMChain
from langchain.callbacks import get_openai_callback


# get external variables or functions:
import src.API_key as key

# Load data to feed ChatGPT

In [9]:
## set working environment
#> Get the current working directory
print(os.getcwd())
directory = os.getcwd()

# List files in the current working directory
files = os.listdir('.')
# Display the list of files
print(files)

c:\DATEN\PHD\Article_SoftRobotIntervention\Analyses\main study - ChatGPT
['.venv', 'data', 'output', 'runChatGPT.ipynb', 'runChatGPT_new.ipynb', 'src', 'testChatGPT.ipynb', 'v01', 'v02']


## Load Scenario Texts
This should be the final scenario texts in English of the two robots:

* rescue robot
* socially assistive robot


source: https://python.langchain.com/v0.2/docs/tutorials/pdf_qa/

In [12]:
# Load the scenario texts of the rescue robot
file_path = directory + "/data/scenario texts/" + "rescue robot" + ".pdf"
loader = PyPDFLoader(file_path)
doc_RR = loader.load()
print("length PDF rescue robot:", len(doc_RR))

# Load the scenario texts of the socially assistive robot
file_path = directory + "/data/scenario texts/" + "socially assistive robot" + ".pdf"
loader = PyPDFLoader(file_path)
doc_SAR = loader.load()
print("length PDF socially assistive robot:", len(doc_SAR))

length PDF rescue robot: 6
length PDF socially assistive robot: 6


 Using a text splitter, the loaded documents will be split into smaller documents that can more easily fit into an LLM's context window, then load them into a vector store. Then a retriever from the vector store is created for use in our RAG chain.

In [48]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200)
splits_RR = text_splitter.split_documents(doc_RR)
vectorstore_RR = Chroma.from_documents(documents=splits_RR, embedding=OpenAIEmbeddings(openai_api_key=key.openai_api_key))

retriever_RR = vectorstore_RR.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [39]:
vectorstore_RR

<langchain_community.vectorstores.chroma.Chroma at 0x1ea4bba4550>

In [40]:
llm = ChatOpenAI(model="gpt-4o", openai_api_key=key.openai_api_key)

In [49]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever_RR, question_answer_chain)

results = rag_chain.invoke({"input": "What are the advantages of soft robots?"})

results

{'input': 'What are the advantages of soft robots?',
 'context': [Document(metadata={'page': 4, 'source': 'c:\\DATEN\\PHD\\Article_SoftRobotIntervention\\Analyses\\main study - ChatGPT/data/scenario texts/rescue robot.pdf'}, page_content='––––––––––––  Second  Page in Experiment (Intervention) ––––––––––––   \nSoft Robots for Search and Rescue Missions  \nBenefits of soft robots for search and rescue missions might be:  \n●  Access to areas unreachable or too dangerous for human rescuers  \n●  Delivery  of essential supplies (water, food, medicine) until victims are safely  \nextracted  \n●  Reduced risk of injury to victims due to their flexibility and adaptability  \nPossible risks of soft robots for search and rescue missions might be:  \n●  Algorithms  guiding soft robots may be biased, leading to unfair or \ndiscriminatory outcomes, regarding (i) where to concentrate rescue efforts, (ii) \nwhom to search for first, (iii) who should be given priority treatment, (iv) who \nmust be l

In [32]:
print(results["context"][0].page_content)
print(results["context"][0].metadata)

essential supplies, and autonomously assisting in the rescue of victims, these robots 
can enhance the efficiency and effectiveness of rescue operations.  
While robots f or search and rescue are still in the development phase, it is important 
to consider the ethical aspects (risks and benefits) of these technologies.
{'page': 0, 'source': 'c:\\DATEN\\PHD\\Article_SoftRobotIntervention\\Analyses\\main study - ChatGPT/data/scenario texts/rescue robot.pdf'}


In [None]:
ERROR

## Load .xlsx files (lists of words)
This should be the final scenario texts in English of the two robots combined and seperated:

* rescue robot_multipleSheets
* socially assistive robot_multipleSheets
* rescue robot_socially assistive robot_multipleSheets

sources:
* https://python.langchain.com/v0.2/docs/tutorials/llm_chain/ (Build a Simple LLM Application with LCEL)
* https://python.langchain.com/v0.2/docs/how_to/structured_output/ (How to return structured data from a model)
* https://python.langchain.com/v0.2/docs/how_to/llm_token_usage_tracking/ (How to track token usage for LLMs)


In [None]:
## Load the xlsx file of the rescue robot and the socially assistive robot combined
# Path to your Excel file
file_path = directory + "/data/" + "rescue robot_socially assistive robot_multipleSheets" + ".xlsx"
# Load the Excel file
excel_data = pd.ExcelFile(file_path)
# Print the sheet names
print("Sheet names combined:", excel_data.sheet_names)
# Load all sheets into a dictionary of dataframes
all_sheets_Combined = {sheet_name: excel_data.parse(sheet_name) for sheet_name in excel_data.sheet_names}


## Load the xlsx file of the rescue robot and the socially assistive robot combined
# Path to your Excel file
file_path = directory + "/data/" + "rescue robot_multipleSheets" + ".xlsx"
# Load the Excel file
excel_data = pd.ExcelFile(file_path)
# Print the sheet names
print("Sheet names RR:", excel_data.sheet_names)
# Load all sheets into a dictionary of dataframes
all_sheets_RR = {sheet_name: excel_data.parse(sheet_name) for sheet_name in excel_data.sheet_names}



## Load the xlsx file of the rescue robot and the socially assistive robot combined
# Path to your Excel file
file_path = directory + "/data/" + "socially assistive robot_multipleSheets" + ".xlsx"
# Load the Excel file
excel_data = pd.ExcelFile(file_path)
# Print the sheet names
print("Sheet names SAR:", excel_data.sheet_names)
# Load all sheets into a dictionary of dataframes
all_sheets_SAR = {sheet_name: excel_data.parse(sheet_name) for sheet_name in excel_data.sheet_names}


In [None]:
abbreviations_dict = {
    'RCPP': 'perceived positive usefulness (rest category, refers to a classification of arguments that do not fit into any of the predefined categories)',
    'LC': 'perceived low costs',
    'T': 'perceived trust',
    'SIP': 'perceived positive social impact',
    'HRIP': 'perceived positive Human-Robot-Interaction',
    'AN': 'perceived negative anthropomorphism',
    'SIN': 'perceived positive social impact',
    'R': 'perceived risks',
    'HC': 'perceived high costs',
    'RCN': 'neutral rest category (rest category refers to a classification of arguments that do not fit into any of the predefined categories)',
    'SA': 'perceived safety',
    'TP': 'perceived technological possibilities',
    'TL': 'perceived technological limitations',
    'RCPN': 'perceived negative usefulness (rest category, refers to a classification of arguments that do not fit into any of the predefined categories)',
    'HRIN': 'perceived negative Human-Robot-Interaction',
    'MT': 'perceived mistrust',
    'RCA': 'ambivalent rest category (rest category refers to a classification of arguments that do not fit into any of the predefined categories)',
    'AP': 'perceived positive anthropomorphism'
}

print(abbreviations_dict)
print(abbreviations_dict.keys())
print(abbreviations_dict['AN'])

In [None]:
# Example: Access data from a specific sheet
sheet_name = 'T'
print(all_sheets_Combined[sheet_name].shape)
print(all_sheets_RR[sheet_name].shape)
print(all_sheets_SAR[sheet_name].shape)

Use dictionaries to map each word to its comment.

In [None]:
def create_multivalue_dict(df, key_col, value_col):
    """
    Create a dictionary from a DataFrame where each key maps to a list of values.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame.
    key_col (str): The column name to be used as keys.
    value_col (str): The column name to be used as values.
    
    Returns:
    dict: A dictionary where each key maps to a list of values.
    """
    # Remove rows with NaN in the key columns
    df = df.dropna(subset=[key_col])

    # Create a dictionary to map items to their comments, allowing for multiple comments per key
    multivalue_dict = {}
    for key, value in zip(df[key_col], df[value_col]):
        if key in multivalue_dict:
            multivalue_dict[key].append(value)
        else:
            multivalue_dict[key] = [value]

    return multivalue_dict

# Example usage
data = {
    'constant': ['a', 'b', 'a', 'c', 'b', np.nan],
    'constant_comments': ['comment1', 'comment2', 'comment3', 'comment4', 'comment5', 'comment6']
}
df = pd.DataFrame(data)

df_mapping = create_multivalue_dict(df, 'constant', 'constant_comments')
print(df_mapping)

In [12]:
def combine_dicts(dict1, dict2):
    """
    Combine two dictionaries where each key maps to a list of values.
    
    Parameters:
    dict1 (dict): The first dictionary.
    dict2 (dict): The second dictionary.
    
    Returns:
    dict: A combined dictionary where each key maps to a concatenated list of values.
    """
    combined_dict = dict1.copy()
    for key, values in dict2.items():
        if key in combined_dict:
            combined_dict[key].extend(values)
        else:
            combined_dict[key] = values
    return combined_dict

In [None]:
# Remove rows with NaN in the key columns as they cannot be used as dictionary keys
#> not sensitive to multiple identical keys: dict(zip(df['constant'], df['constant_comments']))
df = all_sheets_Combined[sheet_name]

constant_comments_mapping = create_multivalue_dict(df, 'constant', 'constant_comments')
print("mapping constant x comments:", constant_comments_mapping)
print(len(constant_comments_mapping))

new_comments_mapping = create_multivalue_dict(df, 'new', 'new_comments')
print("mapping new x comments:", new_comments_mapping)
print(len(new_comments_mapping))

# Set up ChatGPT

## Provide prompt template

### for task to get main findings, differences and summary

In [None]:
system_template = """<Context>You are a researcher tasked with summarizing two wordlists that highlight people's assessments of rigid robots compared to soft robots. Laypersons were informed about the potential risks and benefits of {robots} through scenario texts. Initially, they listed their perceived risks and benefits of rigid robots in a list titled "rigid." Subsequently, they learned about the trend towards soft robots, which are made of flexible, soft materials and are electronic-free. They then created a list titled "soft" to highlight the differences between rigid and soft robots. The overarching topic of the two lists is the {topicCategory}.</Context>

<Data Structure>The lists "rigid" and "soft" are dictionaries where the keys are written arguments, and their corresponding values are one or more comments related to those arguments. The value [nan] indicates that no specific comment was provided for the respective entry. If there are multiple comments or missing entries ([nan]), it signifies that the respective argument was mentioned as many times as there are entries.</Data Structure>

<Task>Write two concise bullet points: one highlighting the main findings of the provided "rigid" and "soft" lists combined, and the other detailing the differences between the provided "rigid" and "soft" lists. Each set of bullet points should contain a maximum of five items, focusing on the overarching argument structures. Additionally, provide a summary paragraph of no more than four sentences that encapsulates the main findings and the found differences. Do not use the term list, instead refer to the {robots}. Be scientific and neutral in your wording. Consider all provided information carefully. Check if you have provided the two lists of bullet points (called mainFindings and differences), and the summary paragraph (called summary).</Task>"""

# !!!
system_template = """
<Context>You are a researcher tasked with summarizing two wordlists that highlight people's assessments of rigid robots compared to soft robots. Laypersons were informed about the potential risks and benefits of {robots} through scenario texts. Initially, they listed their perceived risks and benefits of rigid robots in a list titled "rigid." Subsequently, they learned about the trend towards soft robots, which are made of flexible, soft materials and are electronic-free. They then created a list titled "soft" to highlight the differences between rigid and soft robots. The overarching topic of the two lists is the {topicCategory}, whereby the topic involved {topicCategoryDetails}.</Context>

<Data Structure>The lists "rigid" and "soft" are dictionaries where the keys are written arguments, and their corresponding values are one or more comments related to those arguments. The value [nan] indicates that no specific comment was provided for the respective entry. If there are multiple comments or missing entries ([nan]), it signifies that the respective argument was mentioned as many times as there are entries.</Data Structure>

<Task>Write two concise bullet points: one highlighting the main findings of the provided "rigid" list, and the highlighting the main findings of the provided "soft" list. Each set of bullet points should contain a maximum of five items, focusing on the overarching argument structures. Additionally, provide a summary paragraph of no more than four sentences that encapsulates the main findings of both lists. Do not use the term list, instead refer to the {robots}. Be scientific and neutral in your wording. Consider all provided information carefully. Check if you have provided the two lists of bullet points (called mainFindingsRigid and mainFindingsSoft), and the summary paragraph (called summary).</Task>
"""


user_template = """List "rigid": 
{rigid}

List "soft": 
{soft}"""

# rescue robots and socially assistive robots
prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", user_template)]
)

result = prompt_template.invoke({"robots": "rescue robots and socially assistive robots", "topicCategory": abbreviations_dict[sheet_name], "rigid": constant_comments_mapping, "soft": new_comments_mapping})
print(result)

print("result:", result)
print("result.to_messages():", result.to_messages())

### for task to get superordinate categories within single categories

In [None]:
system_template = """
<Context>
You are a researcher tasked with summarizing a list of words into generic/superordinate categories. Based on these categories, create a dictionary that assigns the respective subordinate terms (keys from the provided "overallList") to the generic terms. Laypersons were informed about the potential risks and benefits of rigid and soft {robots} through scenario texts. They then listed their perceived risks and benefits of rigid and soft robots in the "overallList" wordlist. The overarching topic of the list is {topicCategory}.
</Context>

<Data Structure>
The list "overallList" is a dictionary where the keys are written arguments, and the corresponding values are one or more comments related to those arguments. The value [nan] indicates that no specific comment was provided for the respective entry. If there are multiple comments or missing entries ([nan]), it signifies that the respective argument was mentioned multiple times, emphasizing its importance.
</Data Structure>

<Task>
Your task is to create two outputs:
1. A list called "listGeneric" that contains the generic/superordinate categories. You may use no more than six different categories.
2. A dictionary called "dictionary" that contains:
   - Keys: The generic/superordinate categories.
   - Values: The corresponding words (keys) from the "overallList" that have been summarized under each category.
The dictionary must contain all corresponding words (keys) from the "overallList". If it is not possible to assign a specific word, please place it in a category called "rest category".
</Task>
"""

# !!!
system_template = """
<Context> You are a researcher tasked with summarizing a list of words into generic/superordinate categories. Based on these categories, create a dictionary that assigns the respective subordinate terms (keys from the provided "overallList") to the generic terms. Laypersons were informed about the potential risks and benefits of rigid and soft {robots} through scenario texts. They then listed their perceived risks and benefits of rigid and soft robots in the "overallList" wordlist. The overarching topic of the list is {topicCategory}, whereby the topic involved {topicCategoryDetails}.</Context>

<Data Structure> The list "overallList" is a dictionary where the keys are written arguments, and the corresponding values are one or more comments related to those arguments. The value [nan] indicates that no specific comment was provided for the respective entry. If there are multiple comments or missing entries ([nan]), it signifies that the respective argument was mentioned multiple times, emphasizing its importance. </Data Structure>

<Task> Your task is to create two outputs:
1. A list called "listGeneric" that contains the generic/superordinate categories. You may use no more than six different categories.
2. A dictionary called "dictionary" that contains: Keys (the generic/superordinate categories) and values (the corresponding words - keys - from the "overallList" that have been summarized under each category).
The dictionary must contain all corresponding words (keys) from the "overallList". If it is not possible to assign a specific word, please place it in a category called "rest category".</Task>
"""




user_template = """
List "overallList": 
{overallList}
"""

# rescue robots and socially assistive robots
prompt_template_SC = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", user_template)]
)

constant_new_comments_mapping = combine_dicts(constant_comments_mapping, new_comments_mapping)

result = prompt_template_SC.invoke({"robots": "rescue robots and socially assistive robots", "topicCategory": abbreviations_dict[sheet_name], "overallList": constant_new_comments_mapping})
print(result)

print("result:", result)
print("result.to_messages():", result.to_messages())

## Provide schemas for structured outputs

### for task to get main findings, differences and summary

In [None]:
json_schema = {
    "title": "Outputs",
    "description": "Bullet lists detailing the similarities and differences between the rigid and soft lists and a summary paragraph.",
    "type": "object",
    "properties": {
        "mainFindings": {
            "type": "string",
            "description": "Bullet lists highlighting the main findings of the provided rigid and soft lists",
        },
        "differences": {
            "type": "string",
            "description": "Bullet lists detailing the differences between the rigid and soft lists",
        },
          "summary": {
            "type": "string",
            "description": "Summary paragraph that provides a summary of the main findings and the found differences",
        },
    },
    "required": ["similarities", "differences", "summary"],
}

### for task to get superordinate categories within single categories

In [None]:
json_schema_notUsed = {
    "title": "Outputs",
    "description": "List that contains the generic / superordinate categories and a dictionary, which assigns the respective subordinate terms to the generic terms.",
    "type": "object",
    "properties": {
        "listGeneric": {
            "type": "string",
            "description": "List that contains the generic / superordinate categories",
        },
        "dictionary": {
            "type": "string",
            "description": "Dictionary that contains the keys, the generic / superordinate categories and the corresponding words that have been summarised under the respective category",
        },
    },
    "required": ["listGeneric", "dictionary"],
}

In [27]:
json_schema_SC = {
    "title": "Outputs",
    "description": "List that contains the generic/superordinate categories and a dictionary that assigns the respective subordinate terms to the generic terms.",
    "type": "object",
    "properties": {
        "listGeneric": {
            "type": "array",
            "description": "List that contains the generic/superordinate categories.",
            "items": {
                "type": "string"
            }
        },
        "dictionary": {
            "type": "object",
            "description": "Dictionary that contains the generic/superordinate categories as keys and the corresponding words from the 'overallList' as values.",
            "additionalProperties": {
                "type": "array",
                "items": {
                    "type": "string"
                }
            }
        }
    },
    "required": ["listGeneric", "dictionary"]
}

## Define basic API call

### for task to get main findings, differences and summary

In [21]:
def basic_API_call(
    prompt,
    robots,
    topicCategory,
    openai_api_key,
    dictonaryRigid,
    dictonarySoft,
    json_schema,
    model_name="gpt-4o",
    max_tokens=1000,
):

    # prompt = PromptTemplate(template=template)
    seed = 123

    model = ChatOpenAI(model=model_name, openai_api_key=openai_api_key, max_tokens=max_tokens, model_kwargs={"seed": seed}, temperature=0.0)
       
    structured_llm = model.with_structured_output(json_schema, include_raw=True)
    chain = prompt | structured_llm

    with get_openai_callback() as cb:
        response = chain.invoke(
            {"robots": robots, "topicCategory": topicCategory, "rigid": dictonaryRigid, "soft": dictonarySoft}
        )
        print(cb)
    
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Total Cost (USD): ${cb.total_cost}")
        
    return response

### for task to get superordinate categories within single categories

In [22]:
def basic_API_call_SC(
    prompt,
    robots,
    topicCategory,
    openai_api_key,
    dictonaryCombined,
    json_schema,
    model_name="gpt-4o",
    max_tokens=1000,
):

    # prompt = PromptTemplate(template=template)
    seed = 123

    model = ChatOpenAI(model=model_name, openai_api_key=openai_api_key, max_tokens=max_tokens, model_kwargs={"seed": seed}, temperature=0.0)
       
    structured_llm = model.with_structured_output(json_schema, include_raw=True)
    chain = prompt | structured_llm

    with get_openai_callback() as cb:
        response = chain.invoke(
            {"robots": robots, "topicCategory": topicCategory, "overallList": dictonaryCombined}
        )
        print(cb)
    
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Total Cost (USD): ${cb.total_cost}")
        
    return response

# Run ChatGPT

## Example (overall)

only for main findings, difference, summary

> Remark: The argument structures between the two types of robots differ significantly. Therefore, the robots are qualitatively summarized separately to ensure a clear and accurate comparison.

In [None]:
print(f"sheet_name: {sheet_name}")
print(f"abbreviations_dict[sheet_name]: {abbreviations_dict[sheet_name]}")


result = basic_API_call(prompt=prompt_template,
    robots="rescue robots and socially assistive robots",
    topicCategory=abbreviations_dict[sheet_name],
    openai_api_key=key.openai_api_key,
    dictonaryRigid=constant_comments_mapping, # overall
    dictonarySoft=new_comments_mapping,
    json_schema=json_schema,
    model_name="gpt-4o",
    max_tokens=1000,
)

In [None]:
# Extract the 'parsed' section from the JSON data
parsed_section = result.get('parsed', {})
#print(parsed_section)
# Extract the translations
mainFindings = parsed_section.get('mainFindings')
differences = parsed_section.get('differences')
summary = parsed_section.get('summary')

print(f"result (raw): {result}")
print(f"mainFindings: {mainFindings}")
print(f"differences: {differences}")
print(f"summary: {summary}")

## Separately for robots (rescue robot and socially assistive robot)

### for rescue robots (main findings, difference, summary)

In [None]:
categories = []
mainFindings = []
differences = []
summary = []
rawResults = []


for category in abbreviations_dict.keys():
    print(f"category: {category}")
    
    # do not process the rest categories
    if category not in ['RCPP', 'RCPN', 'RCA', 'RCN']:
        df = all_sheets_RR[category]
        constant_comments_mapping = create_multivalue_dict(df, 'constant', 'constant_comments')
        # print("mapping constant x comments:", constant_comments_mapping)
        # print(len(constant_comments_mapping))
        
        new_comments_mapping = create_multivalue_dict(df, 'new', 'new_comments')
        # print("mapping new x comments:", new_comments_mapping)
        # print(len(new_comments_mapping))
    
        result = basic_API_call(prompt=prompt_template,
            robots="rescue robots",
            topicCategory=abbreviations_dict[category],
            openai_api_key=key.openai_api_key,
            dictonaryRigid=constant_comments_mapping,
            dictonarySoft=new_comments_mapping,
            json_schema=json_schema,
            model_name="gpt-4o",
            max_tokens=1600, # increase limit
        )
        
        # append raw results
        categories.append(category)
        rawResults.append(result)
        
        # append parsed results
        parsed_section = result.get('parsed', {})
        mainFindings.append(parsed_section.get('mainFindings'))
        differences.append(parsed_section.get('differences'))
        summary.append(parsed_section.get('summary'))
        #print("length of mainFindings:", len(parsed_section.get('mainFindings')))
        #print("length of differences:", len(parsed_section.get('differences')))
        #print("length of summary:", len(parsed_section.get('summary')))
        
# save file
df_RR = pd.DataFrame({
    'Category': categories,
    'mainFindings': mainFindings,
    'differences': differences,
    'summary': summary,
    'rawResults' : rawResults
})

# Path to your Excel file
file_path = directory + "/output/" + "rescue robot_ChatGPT" + ".xlsx"
# save the dataframe to an Excel file
df_RR.to_excel(file_path, index=False)

### for rescue robots (get superordinate categories within single categories: listGeneric, dictionary)

In [None]:
categories = []
mainFindings = []
differences = []
summary = []
rawResults = []


for category in abbreviations_dict.keys():
    print(f"category: {category}")
    
    # do not process the rest categories
    # not in ['RCPP', 'RCPN', 'RCA', 'RCN']:
    if category in ['MT']:
        df = all_sheets_RR[category]
        constant_comments_mapping = create_multivalue_dict(df, 'constant', 'constant_comments')
        # print("mapping constant x comments:", constant_comments_mapping)
        # print(len(constant_comments_mapping))
        
        new_comments_mapping = create_multivalue_dict(df, 'new', 'new_comments')
        # print("mapping new x comments:", new_comments_mapping)
        # print(len(new_comments_mapping))
        constant_new_comments_mapping = combine_dicts(constant_comments_mapping, new_comments_mapping)


        result = basic_API_call_SC(prompt=prompt_template_SC,
            robots="rescue robots",
            topicCategory=abbreviations_dict[category],
            openai_api_key=key.openai_api_key,
            dictonaryCombined=constant_new_comments_mapping,
            json_schema=json_schema_SC,
            model_name="gpt-4o",
            max_tokens=2000, # increase limit
        )
        
        # append raw results
        categories.append(category)
        rawResults.append(result)
        
        # append parsed results
        #parsed_section = result.get('parsed', {})
        #mainFindings.append(parsed_section.get('mainFindings'))
        #differences.append(parsed_section.get('differences'))
        #summary.append(parsed_section.get('summary'))
        #print("length of mainFindings:", len(parsed_section.get('mainFindings')))
        #print("length of differences:", len(parsed_section.get('differences')))
        #print("length of summary:", len(parsed_section.get('summary')))
        


In [None]:
parsed_section = result.get('parsed', {})
print(parsed_section)

In [None]:
# save file
df_RR = pd.DataFrame({
    'Category': categories,
    'mainFindings': mainFindings,
    'differences': differences,
    'summary': summary,
    'rawResults' : rawResults
})

# Path to your Excel file
file_path = directory + "/output/" + "rescue robot_ChatGPT" + ".xlsx"
# save the dataframe to an Excel file
df_RR.to_excel(file_path, index=False)

### for socially assistive robots (main findings, difference, summary)

In [None]:
categories = []
mainFindings = []
differences = []
summary = []
rawResults = []


for category in abbreviations_dict.keys():
    print(f"category: {category}")
    
    # do not process the rest categories
    if category not in ['RCPP', 'RCPN', 'RCA', 'RCN']:
        df = all_sheets_SAR[category]
        constant_comments_mapping = create_multivalue_dict(df, 'constant', 'constant_comments')
        # print("mapping constant x comments:", constant_comments_mapping)
        # print(len(constant_comments_mapping))
        
        new_comments_mapping = create_multivalue_dict(df, 'new', 'new_comments')
        # print("mapping new x comments:", new_comments_mapping)
        # print(len(new_comments_mapping))
    
        result = basic_API_call(prompt=prompt_template,
            robots="socially assistive robots",
            topicCategory=abbreviations_dict[category],
            openai_api_key=key.openai_api_key,
            dictonaryRigid=constant_comments_mapping,
            dictonarySoft=new_comments_mapping,
            json_schema=json_schema,
            model_name="gpt-4o",
            max_tokens=1000,
        )
        
        # append raw results
        categories.append(category)
        rawResults.append(result)
        
        # append parsed results
        parsed_section = result.get('parsed', {})
        mainFindings.append(parsed_section.get('mainFindings'))
        differences.append(parsed_section.get('differences'))
        summary.append(parsed_section.get('summary'))
        
# save file
df_SAR = pd.DataFrame({
    'Category': categories,
    'mainFindings': mainFindings,
    'differences': differences,
    'summary': summary,
    'rawResults' : rawResults
})

# Path to your Excel file
file_path = directory + "/output/" + "socially assistive robot_ChatGPT" + ".xlsx"
# save the dataframe to an Excel file
df_SAR.to_excel(file_path, index=False)