In [8]:
import ollama
import sys
import csv
import pandas as pd

In [9]:
# Define a function to replace "not numbered" with the value in "Variable Label"
def replace_not_numbered(row):
    if row["Question number\n (Questionnaire file)"] == "not numbered":
        return row["Variable Label\n (Data files)"]
    else:
        return row["Question number\n (Questionnaire file)"]
    
survey_questions  = pd.read_excel('Survey Questions Overview.xlsx', sheet_name='Wave 1', engine='openpyxl')
# Clean the survey questions dataframe to make it more usable for mapping
# Fill forward non-null ADICO Category values to apply them to all relevant rows, Specify the columns to forward fill excluding "Values" and "Value labels"
columns_to_ffill = [col for col in survey_questions.columns if col not in ["Values", "Value labels"]]

# Forward fill the specified columns
survey_questions[columns_to_ffill] = survey_questions[columns_to_ffill].ffill()


# Apply the function to replace "not numbered" with the value in "Variable Label"
survey_questions["Question number\n (Questionnaire file)"] = survey_questions.apply(replace_not_numbered, axis=1)


# Set the index to {value of "Question number\n (Questionnaire file)"} + "_" + {str(value of "Values")}
survey_questions.set_index(survey_questions["Variable Label\n (Data files)"] + "_" + survey_questions["Values"].astype(str), inplace=True)
survey_questions.drop("ID_nan", inplace=True)

question_answers_list = []
# Define a function to create the combined string
def combine_description_and_labels(group, question_answers_list, qnum):
    question_subset = survey_questions[survey_questions["Question number\n (Questionnaire file)"] == group["Question number\n (Questionnaire file)"].iloc[0]]
    # Check if it's the first row instance with the current "Question number\n (Questionnaire file)" column value
    first_instance_index = question_subset[question_subset.duplicated(subset=["Question number\n (Questionnaire file)"], keep="first")].index
    combined_string = ""
    if first_instance_index.size != 0:
        first_description = question_subset["Description"].iloc[0]
        if first_description != group["Description"].iloc[0]:
         # If not the first instance, start with the first instance's "Description" column value
            combined_string += first_description
    # Concatenate the current row's "Description" and all "Value labels" values
    combined_string += str(group["Description"].iloc[0]) #+ " " + "; ".join(group["Value labels"].astype(str))
    question_answers_list = question_answers_list + [combined_string] * group.shape[0]  # Extend the list with the combined strings
    return question_answers_list

# Group by "Variable Label\n (Data files)" and apply the function to create the combined string
for group in survey_questions.groupby("Variable Label\n (Data files)",sort=False):
    question_answers_list = combine_description_and_labels(group[1], question_answers_list, group[1]["Question number\n (Questionnaire file)"].iloc[0])
survey_questions["question_answers_combined"] = question_answers_list


In [10]:
survey_questions['Variable Label\n (Data files)'].iat[330]

'Q31a_media_trust'

In [53]:
selected_statements = pd.read_csv('tree_selected _statements.csv')
selected_statements['Aim'] = [survey_questions[survey_questions['Variable Label\n (Data files)'] == aim]['question_answers_combined'].iat[0] for aim in list(selected_statements['Aim'])]
selected_statements['Condition_resp'] = [survey_questions[survey_questions['Variable Label\n (Data files)'] == condition]['Value labels'].iat[0] for condition in list(selected_statements['Condition'])]
selected_statements['Condition'] = [survey_questions[survey_questions['Variable Label\n (Data files)'] == condition]['question_answers_combined'].iat[0] for condition in list(selected_statements['Condition'])]

selected_statements

# selected_statements.at[0,'Condition']

Unnamed: 0,Aim,Aim_resp,filter1,Condition,Threshold,filter2,Condition_resp
0,How frequently do you read information about f...,1 - Very infrequently,16.0,How often do you think a flood occurs on the p...,2.5,13.0,My house is completely safe
1,Please indicate if you have already implemente...,I have already implemented this non-structural...,15.0,Please indicate if you have already implemente...,3.5,3.0,I have already implemented this structural mea...
2,Please indicate if you have already implemente...,I have already implemented this non-structural...,15.0,Please indicate if you have already implemente...,3.5,3.0,I have already implemented this structural mea...
3,Please indicate if you have already implemente...,I have already implemented this non-structural...,15.0,Please indicate if you have already implemente...,3.5,3.0,I have already implemented this structural mea...
4,Please indicate if you have already implemente...,I have already implemented this non-structural...,15.0,Please indicate if you have already implemente...,3.5,3.0,I have already implemented this structural mea...
5,Please indicate if you have already implemente...,I do not intend to implement this non-structur...,11.0,Please indicate if you have already implemente...,3.5,9.0,I have already implemented this non-structural...
6,Please indicate if you have already implemente...,I have already implemented this structural mea...,17.0,Please indicate if you have already implemente...,3.5,2.0,I have already implemented this structural mea...


In [54]:
def IGMaker(row):
    aim = row['Aim']
    aim_resp = row['Aim_resp']
    condition = row['Condition']
    condition_resp = row['Condition_resp']

    prompt = f"""The answer to the (action) question '{aim}' is '{aim_resp}', if the answer to the (condition) question '{condition}' is '{condition_resp}'. 
                Rewrite this information in a statement in the form of: 
                'Households will do x action if y condition'
                """

    response = ollama.chat(model='mistral', messages=[{
        'role':'user',
        'content': prompt    
        },
        ])
    return response['message']['content']

selected_statements["IG_statements"] = selected_statements.apply(lambda row: IGMaker(row), axis=1)

In [57]:
[print(statement) for statement in list(selected_statements['IG_statements'])]

 Households will infrequently read information about flooding and other hazards from general media if they believe that their property is completely safe.
 Households will indicate having implemented community safety involvement if they have already installed anti-backflow valves on pipes.
 Households will indicate having implemented the non-structural measure of being an active community group member for safety if they have already implemented structural measures such as installing a pump and/or one or more drainage systems.
 Households will indicate having implemented community safety involvement if they have already fixed water barriers (structural measure) is not applicable. If they have already implemented being an active member in a community group aimed at making the community safer (non-structural measure), then the statement is: "Households will indicate 'I have already implemented this non-structural measure' if they have engaged in community safety initiatives."
 Households 

[None, None, None, None, None, None, None]

{'model': 'mistral',
 'created_at': '2024-04-24T13:02:03.9408155Z',
 'message': {'role': 'assistant',
  'content': ' Households will infrequently read information about flooding and other hazards from general media if they believe that a flood occurring on their property is a very infrequent occurrence.'},
 'done': True,
 'total_duration': 24020756800,
 'load_duration': 1560100,
 'prompt_eval_count': 134,
 'prompt_eval_duration': 16795941000,
 'eval_count': 38,
 'eval_duration': 7219703000}

{'model': 'mistral',
 'created_at': '2024-04-24T12:52:13.6623948Z',
 'message': {'role': 'assistant',
  'content': ' He or she seldom engages with media content concerning flooding and other hazards.'},
 'done': True,
 'total_duration': 4839265900,
 'load_duration': 2057000,
 'prompt_eval_count': 11,
 'prompt_eval_duration': 1349573000,
 'eval_count': 19,
 'eval_duration': 3485178000}