In [1]:
import pandas as pd
import os

In [2]:
def process_csv(file_path):
    # Read the CSV file
    ldata = pd.read_csv(file_path)

    # Add the new columns with default values
    columns = ['qs', 'sense', 'distance', 'duration', 'type', 'ctrlq']
    for column in columns:
        ldata[column] = None

    # Populate "sense" column
    ldata.loc[ldata['name'].isin(['Cindy', 'John', 'Sarah', 'David', 'Mary', 'Michael', 'Steven', 'Stephanie', 'Olivia', 'Lucas', 'Trever', 'Emma']), 'sense'] = 'see'
    ldata.loc[ldata['name'].isin(['Harry', 'Michelle', 'Sandra', 'Ben', 'Robert', 'Emily', 'Allie', 'Ken', 'Mia', 'William', 'Penelope', 'James']), 'sense'] = 'hear'

    # Populate "distance" column
    ldata.loc[ldata['name'].isin(['Cindy', 'John', 'Sarah', 'David', 'Harry', 'Michelle', 'Sandra', 'Ben']), 'distance'] = 'next'
    ldata.loc[ldata['name'].isin(['Mary', 'Michael', 'Steven', 'Stephanie', 'Robert', 'Emily', 'Allie', 'Ken']), 'distance'] = 'street'
    ldata.loc[ldata['name'].isin(['Olivia', 'Lucas', 'Trever', 'Emma', 'Mia', 'William', 'Penelope', 'James']), 'distance'] = 'block'

    # Populate "duration" column
    ldata.loc[ldata['name'].isin(['Sarah', 'David', 'Steven', 'Stephanie', 'Trever', 'Emma', 'Sandra', 'Ben', 'Allie', 'Ken', 'Penelope', 'James']), 'duration'] = 'short'
    ldata.loc[ldata['name'].isin(['Cindy', 'John', 'Mary', 'Michael', 'Olivia', 'Lucas', 'Harry', 'Michelle', 'Robert', 'Emily', 'Mia', 'William']), 'duration'] = 'long'

    # Populate "charID" column
    ldata.loc[ldata['name'].isin(['Cindy', 'John', 'Sarah', 'David', 'Mary', 'Michael', 'Steven', 'Stephanie', 'Olivia', 'Lucas', 'Trever', 'Emma', 'Harry', 'Sandra', 'Robert', 'Allie', 'Mia', 'Penelope']), 'charID'] = 'sighted'
    ldata.loc[ldata['name'].isin(['Michelle', 'Ben', 'Emily', 'Ken', 'William', 'James']), 'charID'] = 'blind'

    # Populate "type" column
    ldata.loc[ldata['name'].isin(['Sarah', 'David', 'Sandra', 'Ben']), 'type'] = 'ns'
    ldata.loc[ldata['name'].isin(['Cindy', 'John', 'Harry', 'Michelle']), 'type'] = 'nl'
    ldata.loc[ldata['name'].isin(['Steven', 'Stephanie', 'Allie', 'Ken']), 'type'] = 'ms'
    ldata.loc[ldata['name'].isin(['Mary', 'Michael', 'Robert', 'Emily']), 'type'] = 'ml'
    ldata.loc[ldata['name'].isin(['Trever', 'Emma', 'Penelope', 'James']), 'type'] = 'fs'
    ldata.loc[ldata['name'].isin(['Olivia', 'Lucas', 'Mia', 'William']), 'type'] = 'fl'

    # List of all possible 'qs' values and their corresponding search strings
    qs_values = ["male/female", "30yo/60yo", "happy/sad", "crying", "smile/frown", 
             "hair color", "pendant", "tshirt", "siblings", 
             "hat", "eye color", "pea mole", "dime mole", "hear say", "understand", 
             "who to", "Brit/American accent", "US accent", "eating", "polite/flirt", "met before", "recognize"]
            
    actually_search_for = ["male", "30", "happy sad", "crying", "frown", 
                        "hair color", "pendant", "t-shirt", "siblings", 
                        "wearing a hat", "eye", "a pea", "dime", "heard the speaker", "understand", 
                        "was talking to", "British", "United States", "eating", "polite", "met this person", "recognize"]

    # Function to find matching 'qs' values in the 'prompt' column with adjusted logic
    def find_qs(row):
        matches = []
        for i, search_str in enumerate(actually_search_for):
            # Check if the string contains "recognize"
            if "recognize" in row['prompt']:
                if search_str == "recognize":
                    matches.append(qs_values[i])
            # If it does not contain "recognize", add other matches
            else:
                if search_str in row['prompt']:
                    matches.append(qs_values[i])
        return ', '.join(matches)

    # Apply the function to each row
    ldata['qs'] = ldata.apply(find_qs, axis=1)

    # Function to set 'ctrlq' based on conditions
    def set_ctrlq(row):
        if "siblings" in row['qs'] and row['sense'] == "see":
            return 1
        elif "siblings" in row['qs'] and row['sense'] == "hear":
            return 1
        elif "accent" in row['qs'] and row['sense'] == "see":
            return 1
        elif "hair color" in row['qs'] and row['sense'] == "hear":
            return 1
        else:
            return 0  # or None, if you prefer to leave it blank when the condition is not met

    # Apply the function to each row for the 'ctrlq' column
    ldata['ctrlq'] = ldata.apply(set_ctrlq, axis=1)

    # Save the modified DataFrame to the same file
    ldata.to_csv(file_path, index=False)
   

# Directory path
directory = 'c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/'


for filename in os.listdir(directory):
    if filename.startswith("Modified_") and filename.endswith(".csv"):
        file_path = os.path.join(directory, filename)
        process_csv(file_path)
        print(f"The modified CSV file has been saved to {file_path}")


The modified CSV file has been saved to c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/Modified_GPT4Vision_OP_T0.5_R1.csv
The modified CSV file has been saved to c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/Modified_GPT4Vision_OP_T0.5_R2.csv
The modified CSV file has been saved to c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/Modified_GPT4Vision_OP_T0.5_R3.csv
The modified CSV file has been saved to c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/Modified_GPT4Vision_OP_T0.5_R4.csv
The modified CSV file has been saved to c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/Modified_GPT4Vision_OP_T0.5_R5.csv
The modified CSV file has been saved to c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/Modified_GPT4Vision_OP_T0_R1.csv
The modified CSV file has been saved to c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/Modified_GPT4Vision_OP_T0_R2.csv
The modified CSV file has been saved to c:/Users/shara/OneDrive/Desktop/GPT Script/GPT4/Modified_GPT4Vision_OP_T0_R3.csv
The modified CSV file 

In [None]:
ldata