In [1]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

False
0
No GPU found


## Retransfrom csv files form str

In [1]:
import os
from src.utils.format_helpers import convert_str_to_csv

def batch_convert_str_to_csv(directory):
    """Convert all .str files in a specified directory to CSV without modifying the original .str files."""
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.str'):
                str_file = os.path.join(root, file)
                str_dir = os.path.dirname(str_file)

                # Here the base_input_directory is 'results' as we already perfrom the transcription in the 'results' folder 
                # (.str files are in 'results' folder)
                base_input_directory = 'results'
                relative_path = os.path.relpath(str_file, base_input_directory)  
                experiment_name = relative_path.split(os.sep)[0]  # Extract the first folder in 'relative_path'
                print("Directory: ", str_dir)
                print("Experiment: ", experiment_name)
                convert_str_to_csv(str_file, experiment_name)
                print(f"Finished converting {str_file}.")

In [2]:
# Specify the directory containing .str files
directory = 'results'
batch_convert_str_to_csv(directory)

Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S301final.str.
Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S302con.str.
Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S302man.str.
Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S303con.str.
Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S303man.str.
Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S304.str.
Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S305con.str.
Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S306.str.
Directory:  results\Compassion
Experiment:  Compassion
Finished converting results\Compassion\S307.str.
Directory:  results\Compassion
Experiment:  

In [None]:
import os
import pandas as pd

def add_missing_column_recursive(folder_path, new_column_name, new_column_value, insert_position):
    """
    Recursively adds a missing column to all CSV files in a folder and its subfolders.

    Parameters:
        folder_path (str): Path to the folder containing the CSV files.
        new_column_name (str): Name of the new column to be added.
        new_column_value (str): Default value for the new column.
        insert_position (int): Index where the new column should be inserted.
    """
    for root, _, files in os.walk(folder_path):
        for file_name in files:
            if file_name.endswith(".csv"):
                file_path = os.path.join(root, file_name)
                
                # Read the CSV file
                try:
                    df = pd.read_csv(file_path)
                except Exception as e:
                    print(f"Error reading file {file_path}: {e}")
                    continue
                
                # Check if the column already exists
                if new_column_name not in df.columns:
                    # Insert the new column at the specified position
                    df.insert(insert_position, new_column_name, new_column_value)
                    
                    # Save the updated CSV back to the file
                    try:
                        df.to_csv(file_path, index=False)
                        print(f"Updated file: {file_path}")
                    except Exception as e:
                        print(f"Error saving file {file_path}: {e}")
                else:
                    print(f"Column '{new_column_name}' already exists in {file_path}")

# Example Usage
#folder_path = "Reports/meditations_transcripts"  # Replace with your folder path
#add_missing_column_recursive(folder_path, new_column_name="Content Type", new_column_value="Audio", insert_position=3)


Updated file: Reports/meditations_transcripts\overview_interviews.csv
Updated file: Reports/meditations_transcripts\transcripts_merged.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S301final.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S302con.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S302man.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S303con.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S303man.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S304.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S305con.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S306.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S307.csv
Updated file: Reports/meditations_transcripts\raw\Compassion\S313.csv
Updated file: Reports/meditations_transcripts\raw\OBE1\ID 05.csv
Updated file: Reports/meditations_transcripts\raw\OBE1\Id 08.csv
Updated fil

In [None]:
import os
import pandas as pd

def replace_brackets_in_csv(directory, column_name):
    """
    Recursively looks into a directory for CSV files, replaces square brackets [ ] 
    with parentheses ( ) in a specific column, and prints the content every time a change is made.

    :param directory: The root directory to start searching for CSV files.
    :param column_name: The name of the column to modify.
    """
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file
                    df = pd.read_csv(file_path)

                    # Check if the column exists in the CSV
                    if column_name in df.columns:
                        # Replace brackets [] with parentheses () in the specific column
                        original_values = df[column_name].tolist()  # Save original values for comparison
                        df[column_name] = df[column_name].astype(str).str.replace(r'\[', '(', regex=True).str.replace(r'\]', ')', regex=True)

                        # Print the content every time a change is made
                        for original, modified in zip(original_values, df[column_name].tolist()):
                            if original != modified:
                                print(f"File: {file_path}")
                                print(f"Original: {original}")
                                print(f"Modified: {modified}")
                                print("-" * 50)

                        # Save the modified CSV back to the file
                        df.to_csv(file_path, index=False)
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")

# Example usage
#replace_brackets_in_csv("evaluation/references", 'Content')

File: evaluation/references\ID 05.csv
Original: Yes. [END INTERVIEW]
Modified: Yes. (END INTERVIEW)
--------------------------------------------------
File: evaluation/references\Id 08.csv
Original: [BEGIN INTERVIEW] how was uh the second part?
Modified: (BEGIN INTERVIEW) how was uh the second part?
--------------------------------------------------
File: evaluation/references\Id 7a.csv
Original: Time-Stamps are shfited & mostly in french...[START INTERVIEW] 00:04:02
Modified: Time-Stamps are shfited & mostly in french...(START INTERVIEW) 00:04:02
--------------------------------------------------
File: evaluation/references\S201-2.csv
Original: [SMALL TALK overlapping] And if you want coffee after, I also can.
Modified: (SMALL TALK overlapping) And if you want coffee after, I also can.
--------------------------------------------------
File: evaluation/references\S201-2.csv
Original: I mean, yes, there is always a bit of a risk because when we put this headset, it's not that easy to p

## Excel to csv

In [None]:
import pandas as pd

df = pd.read_excel('./interviews_corrected/structured_data_manual.xlsx')
# Remove the last column
df = df.iloc[:, :-1]

df.to_csv('./structured_data_manual.csv', index=False)
df

  warn(msg)


Unnamed: 0,Experiment,File Name,Id,Duration,Order Condition,Condition
0,OBE1,ID 05,5,00:10:15,IC,1
1,OBE1,Id 7a,7,00:09:38,IC,I
2,OBE1,Id 7b,7,00:04:01,IC,C
3,OBE1,Id 08,8,00:04:34,CI,1
4,OBE1,Id 13,13,00:07:19,IC,I
...,...,...,...,...,...,...
77,Compassion,S304,304,00:07:05,CI,1
78,Compassion,S305con,305,00:01:41,CI,C
79,Compassion,S306,306,00:08:50,IC,1
80,Compassion,S307,307,00:06:15,CI,1


In [None]:
import requests

url = "https://storage.googleapis.com/bleurt-oss-21/BLEURT-20.zip"
output = "BLEURT-20.zip"

with requests.get(url, stream=True) as r:
    r.raise_for_status()
    with open(output, 'wb') as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)
print("Download completed.")


In [1]:
from src.utils.preprocessing_helpers import preprocessing_csv

str_file="src/test_erro/ADE_pilot_012_interview_21.11.24_part1.str"

preprocessing_csv(str_file=str_file)

Saved preprocessed file to results\processed\..\src\test_erro\ADE_pilot_012_interview_21.11.24_part1.csv
