# Condition Distribution by experiments & Id

In [None]:
import pandas as pd

In [None]:
condition_info = pd.read_excel("../../interviews_corrected/structured_data_manual.xlsx")

In [None]:
id_summary = condition_info.groupby(['Experiment', "Id"]).agg({
    'File Name': 'count',
    'Condition': lambda x: list(x),
    'Order Condition': lambda x: list(set(x))
}).rename(columns={'File Name': 'Interview_Count'})

id_summary.to_csv("summary_interview.csv")

In [None]:
# 1. Experiment summary including total interview count and participant count per experiment
experiment_summary = condition_info.groupby(['Experiment']).agg({
    'File Name': 'count',
    'Id': lambda x: len(set(x)),
}).rename(columns={'File Name': 'Interview_Count', 'Id': 'ID_Count'}).reset_index()

# 2. Number of participants with a given order of condition
order_condition_summary = condition_info.groupby(['Experiment', 'Order Condition']).agg({
    'Id': lambda x: len(set(x))
}).rename(columns={'Id': 'ID_Count_Per_Order'}).reset_index()

# 3. Count of participants with at least one 'C' and one 'I' per Order Condition
def has_C_and_I(conditions):
    unique_conditions = set(conditions)
    return 'C' in unique_conditions and 'I' in unique_conditions

# Compute whether each participant has both 'C' and 'I' and add Order Condition
participants_with_C_and_I_per_order = condition_info.groupby(['Experiment', 'Order Condition', 'Id'])['Condition'] \
    .apply(lambda x: has_C_and_I(x)).reset_index()

# Sum the participants with both 'C' and 'I' per Order Condition
participants_with_C_and_I_per_order = participants_with_C_and_I_per_order[participants_with_C_and_I_per_order['Condition'] == True] \
    .groupby(['Experiment', 'Order Condition']).size().rename("Participants_with_C_and_I").reset_index()

# 4. Count of participants with Condition equal to '1' per Experiment and Order Condition
condition_1_count_per_order = condition_info[condition_info['Condition'] == 1].groupby(['Experiment', 'Order Condition'])['Id'] \
    .nunique().rename("ID with only 1 interview").reset_index()

# Set 'Experiment' as the index in experiment_summary to match the joins
experiment_summary.set_index('Experiment', inplace=True)

# Join summaries
experiment_summary = experiment_summary.join(order_condition_summary.set_index('Experiment'), on='Experiment')
experiment_summary = experiment_summary.join(participants_with_C_and_I_per_order.set_index(['Experiment', 'Order Condition']), 
                                             on=['Experiment', 'Order Condition'])
experiment_summary = experiment_summary.join(condition_1_count_per_order.set_index(['Experiment', 'Order Condition']), 
                                             on=['Experiment', 'Order Condition'])

# Fill any NaN values in Participants_with_C_and_I and "ID with only 1 interview" with 0
experiment_summary['Participants_with_C_and_I'] = experiment_summary['Participants_with_C_and_I'].fillna(0).astype(int)
experiment_summary['ID with only 1 interview'] = experiment_summary['ID with only 1 interview'].fillna(0).astype(int)

experiment_summary.reset_index(inplace=True)  # Reset index for easy viewing

# Save to CSV
experiment_summary.to_csv("experiment_summary.csv", index=False)

experiment_summary


Unnamed: 0,Experiment,Interview_Count,ID_Count,Order Condition,ID_Count_Per_Order,Participants_with_C_and_I,ID with only 1 interview
0,Compassion,10,8,CI,3,0,2
1,Compassion,10,8,IC,4,2,2
2,Compassion,10,8,Unknown,1,0,1
3,OBE1,18,14,CI,6,1,5
4,OBE1,18,14,IC,7,3,4
5,OBE1,18,14,Unknown,1,0,1
6,OBE2,54,26,CI,14,11,2
7,OBE2,54,26,IC,12,9,3


# (Useful) Problems with excel format

In [None]:
def convert_csv_semicolon_to_comma(input_file, output_file):
    """
    Converts a CSV file with semicolons as delimiters to a comma-separated CSV.
    """
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', newline='', encoding='utf-8') as outfile:
        reader = csv.reader(infile, delimiter=';')
        writer = csv.writer(outfile, delimiter=',')
        
        for row in reader:
            writer.writerow(row)

def find_problematic_line(input_file):
    """
    Reads a file line by line to find the line that causes a UnicodeDecodeError.
    """
    with open(input_file, 'r', encoding='utf-8', errors='replace') as file:
        for line_number, line in enumerate(file, start=1):
            # If the line contains the problematic character, print the line number and content
            if '\ufffd' in line:  # '\ufffd' is the replacement character for decoding errors
                print(f"Problematic line at line {line_number}: {line}")
                break
        else:
            print("No problematic lines found.")

def remove_bom_from_file(file_path):
    """
    Removes the Byte Order Mark (BOM) from a file if it exists.
    """
    with open(file_path, 'rb') as file:
        content = file.read()

    # Check for BOM (UTF-8 BOM is \xef\xbb\xbf)
    if content.startswith(b'\xef\xbb\xbf'):
        print(f"BOM found in {file_path}, removing it...")
        content = content[3:]  # Remove the first three bytes (BOM)
        with open(file_path, 'wb') as file:
            file.write(content)
        print(f"BOM successfully removed from {file_path}.")
    else:
        print(f"No BOM found in {file_path}.")

# LLama

In [None]:
import ollama

In [None]:
def read_input_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    return content

# Read the text file and remove filler words
input_txt_path = "../interviews_corrected/raw/Compassion/S301final.txt"

text = read_input_file(input_txt_path)


# Add this at the beginning of the text

text = "Clean the following transcription, don't add extra information, only remove fillers words and repetitions : \n" + text

text

"Clean the following transcription, don't add extra information, only remove fillers words and repetitions : \nSpeaker 1: And you can just a little bit tell about your experience about this two-section, how you feel, could you relax, could you really follow everything she said, helping you and so on?\n\nSpeaker 2: There is no wrong or right to just saying your experience, what was nice, what was, if you had problems, whatever.\n\nSpeaker 1: to be anonym anyway. so just you to understand.\n\nSpeaker 0: uh the points i think are like uh meditating. i could feel i have. i had some uh the gear on me and that was a little bit disturbing because it's i couldn't uh completely dissociate from uh from outside and because i could feel the gear uh. and also while i had my eyes closed I could disassociate from this room and from this place more than when I had the virtual reality and so I saw myself in front of me. but on the other side while I was watching the virtual reality it was much easier f

In [None]:
response = ollama.chat(model='llama3.1', messages=[
   {
      'role': 'user',
      'content': text
   }
])

print(response['message']['content'])

Here is the cleaned transcription:

Speaker 2: There's no right or wrong, just say your experience, what was nice, what wasn't, if you had problems.

Speaker 0: The points I think are like meditating. I could feel that. I had some gear on me and that was a little bit disturbing because I couldn't completely dissociate from outside and because I could feel the gear. And also while I had my eyes closed, I could disassociate from this room and from this place more than when I had the virtual reality. So I saw myself in front of me.

But on the other side, while I was watching the virtual reality, it was much easier for me to focus on part of me and part of my body and to put my focus on me and feel more. But it was still staying in this room and it was less inner than with eyes closed. I don't know if that's clear.

I felt like I was doing a kind of meditation. I could relate more with the person in front of me because I could see him, but I still felt like I was here. While in the previo

In [None]:
# save the response in a text file
output_txt_path = "S301final_Olama.txt"

with open(output_txt_path, 'w', encoding='utf-8') as file:
    file.write(response['message']['content'])