In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from io import StringIO
import pandas as pd

import sys
import json
# Add the parent directory to the path
sys.path.append('..')

# Now you can import the config module
from config import groqkey, OPENAI_Key

In [None]:
# Load the DataFrame from a CSV file
statement_questions = pd.read_csv("SCALAR_tree_selected_statements.csv")
edited_statement_questions = pd.DataFrame(columns=['Attribute', 'Aim', 'Condition1', 'Condition2'])
edited_statement_questions['Attribute'] = statement_questions['Attribute'].copy()  
edited_statement_questions.loc[:, 'Aim'] = statement_questions['Aim_description'] + " Response: " + statement_questions['Aim_resp'] 
edited_statement_questions.loc[:, 'Condition1'] = statement_questions['Condition1_description']  + " Response: " + statement_questions['Condition1_resp']
edited_statement_questions.loc[:, 'Condition2'] = statement_questions['Condition2_description']  + " Response: " + statement_questions['Condition2_resp']

# Convert the DataFrame to JSON text
json_text = edited_statement_questions.to_json()

# Print the JSON text
print(json_text)

In [None]:
csv_text = edited_statement_questions.rename_axis('Index').to_csv()
csv_text

In [None]:
##Json Approach

statlist = []
models = ["gemma-7b-it", "llama3-8b-8192", "mixtral-8x7b-32768", "llama3-70b-8192"]
llm = ChatGroq(temperature=0, model=models[3], api_key=groqkey)
# models = ["gpt-3.5-turbo-0125", "gpt-4o"]
# llm = ChatOpenAI(temperature=0, model=models[1], api_key=OPENAI_Key)


"""Here is an example: "Attribute": "People", "Aim":"do this specific action", "Condition1": "if this condition is met", "Condition2": "and this condition is met" """

def ExampleCompletionFunction(request):
    system = f"""You are a CSV interpreter that converts survey questions and responses into structured statements depicting group actions influenced by specific conditions.

  The input CSV will have the following columns:
  - 'Attribute': The demographic of the survey responses.
  - 'Aim': The action question the responders were asked and the aggregated response to the aim.
  - 'Condition1': The first related condition question the responders were asked and the aggregated response to the first condition question.
  - 'Condition2': The second related condition question the responders were asked and the aggregated response to the second condition question.

  Your task is to rewrite each row of the provided CSV and generate the following columns for the output CSV:
  - 'Attribute': Retain the value of 'Attribute'.
  - 'Aim': Combine and rewrite 'Aim' as an action starting with a base form/infinitive verb, be as specific as possible.
  - 'Condition1': Combine and rewrite 'Condition1' as a condition that should be met, starting with 'if' and using third-person/they.
  - 'Condition2': Combine and rewrite 'Condition2' as a condition that should be met, starting with 'and if' and using third-person/they.

  If a column value is null, leave that column value blank. Do not remove the entire row and do not replace it with another column value.

  The output should allow combining 'Attribute' + 'Aim' + 'Condition1' + 'Condition2' to form a complete third-person sentence describing the behavior of the demographic.

  Be concise but do not oversimplify or generalize the actions and conditions. Maintain any negative language.
  Use ':' as the separator, include the row numbers and the column names exlcuding index.
  Treat each row separately and produce the same number of rows as provided in the input. The output should contain the same number of rows as the input."
                """
    human = "{text}"
    prompt  = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

    chain = prompt | llm
    response = chain.invoke({"text": request})
    return response.content

IG_Statement = ExampleCompletionFunction(csv_text)
print(IG_Statement)
statlist.append(IG_Statement)

In [None]:
# Extract the CSV part from the text
csv_data = IG_Statement.split("\n", 2)[2]

# Convert the CSV data to a pandas DataFrame
IG_component_df = pd.read_csv(StringIO(csv_data),sep=":",index_col=0)

# Display the DataFrame
IG_component_df

In [None]:
##Json Approach

statlist = []
models = ["gemma-7b-it", "llama3-8b-8192", "mixtral-8x7b-32768", "llama3-70b-8192"]
llm = ChatGroq(temperature=0, model=models[3], api_key=groqkey)
# models = ["gpt-3.5-turbo-0125", "gpt-4o"]
# llm = ChatOpenAI(temperature=0, model=models[1], api_key=OPENAI_Key)


"""Here is an example: "Attribute": "People", "Aim":"do this specific action", "Condition1": "if this condition is met", "Condition2": "and this condition is met" """

def ExampleCompletionFunction(request):
    system = f"""You are a JSON interpreter that converts survey questions and responses into structured statements depicting group actions influenced by specific conditions.

The input JSON will have the following variables:
  - 'Attribute': The demographic of the survey responses.
  - 'Aim': The action question the responders were asked and the aggregated response to the aim.
  - 'Condition1': The first related condition question the responders were asked and the aggregated response to the first condition question.
  - 'Condition2': The second related condition question the responders were asked and the aggregated response to the second condition question.

  Your task is to rewrite the provided JSON and generate the following keys for the output JSON:
  - 'Attribute': Retain the value of 'Attribute'.
  - 'Aim': Combine and rewrite 'Aim' as an action starting with a base form/infinitive verb, clearly stating what they are implementing.
  - 'Condition1': Combine and rewrite 'Condition1' as a condition that should be met, starting with 'if' and using third-person/they.
  - 'Condition2': Combine and rewrite 'Condition2' as a condition that should be met, starting with 'and if' and using third-person/they.

  If a key value is null, leave that key value blank. Do not remove the entire object and do not replace it with another key value.

  The output should allow combining 'Attribute' + 'Aim' + 'Condition1' + 'Condition2' to form a complete third-person sentence describing the behavior of the demographic.

  Be concise but do not oversimplify or generalize the actions and conditions. Maintain any negative language.

  Treat each object separately and produce the same number of objects as provided in the input. The output should contain {len(edited_statement_questions)} items.
                """
    human = "{text}"
    prompt  = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

    chain = prompt | llm
    response = chain.invoke({"text": request})
    return response.content

IG_Statement = ExampleCompletionFunction(json_text)
print(IG_Statement)
statlist.append(IG_Statement)

In [None]:
df = pd.DataFrame(columns=["Attribute","Aim","Condition1", "Condition2"])
try:
    # Parse the JSON string into a Python dictionary
    data_dict = json.loads(IG_Statement)
except:
    # Parse the JSON string into a Python dictionary
    data_dict = json.loads('['+IG_Statement.split('[')[1].split(']')[0]+']')        

# Convert the dictionary to a DataFrame
# Since we have only one record, we wrap it in a list to make a single-row DataFrame
IG_component_df = pd.DataFrame(data_dict)

display(IG_component_df)