In [1]:
import pandas as pd  # Manipulação de dados tabulares (DataFrames)
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
def processar_arquivo(path):
    """
    Processa um arquivo CSV específico.

    Args:
    - path: Caminho do arquivo CSV a ser processado.

    Returns:
    - DataFrame contendo os dados processados do arquivo.
    """
    import pandas as pd

    # Ler o arquivo CSV
    current_df = pd.read_csv(path, dtype=str)

    # Ajustar as colunas do DataFrame
    current_df.columns = current_df.iloc[0].values
    current_df.drop('bodyparts', axis=1, inplace=True)
    current_df.drop(0, inplace=True)
    current_df.columns = [col_name + '_' + current_df.iloc[0].values[i] for i, col_name in enumerate(current_df.columns)]
    current_df.drop(1, inplace=True)
    current_df.reset_index(inplace=True, drop=True)

    # Converter as colunas para o tipo float
    current_df = current_df.astype('float')

    return current_df

def extrair_informacoes_nome_arquivo(file_name, space):
    """
    Extrai informações relevantes do nome de um arquivo.

    Args:
    - file_name: Nome do arquivo a ser processado.

    Returns:
    - Tupla contendo as informações extraídas do nome do arquivo.
    """
    import re

    # Inicializar variáveis
    id = None
    day = None
    sex = None
    session_kind = None
    experimental_type = None
    experimental_context = None
    phase = None

    try:
        id_part, NN_part = file_name.split('DLC')
        id_parts = id_part.split(space)

        id = id_parts[0]
        session_kind = id_parts[1]
        experimental_context = id_parts[2] if len(id_parts) > 2 and id_parts[2] != "" else None

        match = re.search(r'\d+', experimental_context)
        if match:
            experimental_context = int(match.group())
        else:
            experimental_context = 0

        experimental_type = NN_part.split("_")[2] if len(id_parts) > 5 else NN_part.split("_")[2]

        sex = id[2] if id.startswith("CC") else None

        if session_kind.lower().startswith("tt"):
            phase = "teste"
            day = 5
        elif session_kind.lower().startswith("tr"):
            phase = "treino"
            day = 4
        elif session_kind.lower().startswith("hab"):
            phase = "habituacao"
            match = re.search(r'\d+', session_kind)
            if match:
                day = int(match.group())
            else:
                day = 1
        else:
            phase = None

    except ValueError:
        print("Formato inválido do nome do arquivo.")
    except IndexError:
        print("Índice inválido ao acessar partes do nome do arquivo.")
    except Exception as e:
        print("Erro:", e)

    return id_part, id, day, sex, session_kind, experimental_type, experimental_context, phase

def processar_diretorio(directory, space = "-"):
    """
    Processa todos os arquivos CSV em um diretório.

    Args:
    - directory: Caminho do diretório contendo os arquivos CSV a serem processados.

    Returns:
    - DataFrame contendo todos os dados processados dos arquivos.
    """
    import os
    import pandas as pd

    file_list = os.listdir(directory)
    csv_list = [file_name for file_name in file_list if file_name.endswith(".csv")]

    all_data = pd.DataFrame()

    for file_name in csv_list:
        path = os.path.join(directory, file_name)
        current_df = processar_arquivo(path)

        id, rat, day, sex, session_kind, experimental_type, experimental_context, phase = extrair_informacoes_nome_arquivo(file_name, space)

        # current_df.insert(0, 'phase', phase)
        current_df.insert(0, 'experimental_context', experimental_context)
        # current_df.insert(0, 'experimental_type', experimental_type)
        current_df.insert(0, 'session_kind', session_kind)
        current_df.insert(0, 'sex', sex)
        current_df.insert(0, 'day', day)
        current_df.insert(0, 'rat', rat)
        current_df.insert(0, 'id_amostra', id)

        all_data = pd.concat([all_data, current_df], ignore_index=True)

    all_data.reset_index(drop=True, inplace=True)

    dest_directory = f'{directory}/result_artemis'

    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)

    csv_file_path = f'{dest_directory}/all_data.csv'

    all_data.to_csv(csv_file_path, index=False)

    # Confirmar o salvamento
    print(f'DataFrame mesclado salvo como CSV em {csv_file_path}.')

    return csv_file_path

In [None]:
directory = "/content/drive/MyDrive/PPGNeuro/area_Exp35/Exp 35 - rodada 2 - analisados"
path = processar_diretorio(directory, space = "-")

In [None]:
data = pd.read_csv(path)

CCM247-HAB-Contexto2DLC_resnet50_exp35-campo-cinzaJun5shuffle1_300000_labeled_completed

CCM247-HAB-Contexto2DLC_resnet50_exp35-campo-cinzaJun5shuffle1_300000

In [None]:
data

In [None]:
data_sorted = data.sort_values(by=['rat', 'day'])
data_sorted

In [None]:
print(data_sorted["day"].unique())
print(data_sorted["sex"].unique())
print(data_sorted["session_kind"].unique())
print(data_sorted["experimental_context"].unique())

In [None]:
filtered_data = data_sorted.query('rat.str.contains("CCM247", case=False, na=False)', engine='python')
filtered_data