In [10]:
def filter_features(dataframe, id=None, phase=None, day=None, session_kind=None, experimental_context=None, context_type=None, sex=None, bodypart=None, selected_columns=None, output_directory=None):
    """
    Applies filters to a DataFrame based on specified values for the given columns and selects specific columns.

    Parameters:
    - dataframe (pd.DataFrame): The target DataFrame.
    - id (str, list, or None): Value or list of values for the 'id' column.
    - phase (str, list, or None): Value or list of values for the 'phase' column.
    - day (int, list, or None): Value or list of values for the 'day' column.
    - session_kind (str, list, or None): Value or list of values for the 'session_kind' column.
    - experimental_context (str, list, or None): Value or list of values for the 'experimental_context' column.
    - context_type (str, list, or None): Value or list of values for the 'context_type' column.
    - sex (str, list, or None): Value or list of values for the 'sex' column.
    - bodypart (str): The body part for which to select columns.
    - selected_columns (list or None): List of columns to select for the final DataFrame.
    - output_directory (str or None): Output directory for saving the result CSV file.

    Returns:
    - pd.DataFrame: Filtered and selected DataFrame.
    """

    import pandas as pd  # Importing pandas at the start of the function

    filters = {
        'id_amostra	': id,
        'rat': id,
        'phase': phase,
        'day': day,
        'session_kind': session_kind,
        'experimental_context': experimental_context,
        'context_type': context_type,
        'sex': sex
    }

    result = dataframe.copy()
    result['frame'] = result.index  # Add 'frame' column with index values

    # Check if filters are provided and filter the DataFrame accordingly
    for column, values in filters.items():
        if values is not None:
            if isinstance(values, list):
                valid_values = [val for val in values if val in dataframe[column].unique()]
                result = result[result[column].isin(valid_values)]
            else:
                if values in dataframe[column].unique():
                    result = result[result[column] == values]

    # Reset 'frame' column to start from zero for each combination of 'id' and 'day'
    result['frame'] = result.groupby(['rat', 'day']).cumcount()

    # Reorganize columns to move 'frame' to the first position
    result = result[['frame'] + [col for col in result.columns if col != 'frame']]

    # Check if selected_columns are provided and select only those columns
    if selected_columns:
        valid_columns = [col for col in selected_columns if col in result.columns]
        selected_dataframe = result[valid_columns]
    else:
        selected_dataframe = result

    # Determine the output directory for saving the CSV file
    if output_directory is not None:
        output_file = os.path.dirname(output_directory)
        output_file = os.path.join(output_file, "dataset_select.csv")
        selected_dataframe.to_csv(output_file, index=False)
        print("the file is saved")

    return selected_dataframe

In [2]:
def filter_likelihood(df, region, min_limit=0.8, output_directory=None):
    """
    Filters out uncertain data points based on the likelihood threshold and performs linear interpolation for missing values.

    Parameters:
    - df (pd.DataFrame): Input DataFrame containing data points and likelihood values.
    - region (str): Name of the region used in the DataFrame, which is part of column names.
    - min_limit (float): Minimum likelihood threshold for filtering out uncertain data points. Default is 6.

    Returns:
    - pd.DataFrame: DataFrame with uncertain data points removed and missing values interpolated.
    """

    import pandas as pd
    import numpy as np

    # Make a copy of the input DataFrame to avoid modifying the original
    df_filtered = df.copy()

    # Filter out data points with likelihood below the minimum limit
    uncertain_data = df_filtered[df_filtered[f'{region}_likelihood'] < min_limit]

    # Replace x and y coordinates with NaN for uncertain data points
    df_filtered.loc[df_filtered[f'{region}_likelihood'] < min_limit, [f'{region}_x', f'{region}_y']] = np.nan

    # Perform linear interpolation to fill missing values
    df_filtered.interpolate(method='linear', inplace=True)

    # Determine the output directory for saving the CSV file
    if output_directory is not None:
        output_file = os.path.dirname(output_directory)
        output_file = os.path.join(output_file, "dataset_filter_likelihood.csv")
        df_filtered.to_csv(output_file, index=False)
        print("the file is saved")

    return uncertain_data, df_filtered

In [3]:
import pandas as pd  # Manipulação de dados tabulares (DataFrames)
import numpy as np  # Manipulação eficiente de arrays e operações matemáticas
import os  # Manipulação de sistema de arquivos

import os
import glob

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
directory = "/content/drive/MyDrive/PPGNeuro/area_Exp35/Exp 35 - rodada 2 - analisados/result_artemis/all_data.csv"
df_coords = pd.read_csv(directory)
df_coords

Unnamed: 0,id_amostra,rat,day,sex,session_kind,experimental_context,nose_x,nose_y,nose_likelihood,ear_l_x,...,ear_r_likelihood,center_n_x,center_n_y,center_n_likelihood,center_b_x,center_b_y,center_b_likelihood,tail_base_x,tail_base_y,tail_base_likelihood
0,CCM247-TR-Contexto2,CCM247,4,M,TR,2,1046.042236,71.806679,0.998112,971.938049,...,0.999178,945.375793,68.480537,0.983135,886.874023,96.242256,0.990015,813.453674,140.484955,0.997932
1,CCM247-TR-Contexto2,CCM247,4,M,TR,2,1046.063232,71.804070,0.998103,971.969910,...,0.999164,945.410339,68.417175,0.983462,887.161743,96.214500,0.990392,813.438049,140.427338,0.997933
2,CCM247-TR-Contexto2,CCM247,4,M,TR,2,1046.063232,71.804070,0.998103,971.969971,...,0.999164,945.414001,68.417107,0.983467,887.154846,96.205910,0.990359,813.435059,140.424377,0.997931
3,CCM247-TR-Contexto2,CCM247,4,M,TR,2,1046.063232,71.804070,0.998103,971.969971,...,0.999164,945.414001,68.417107,0.983467,887.154846,96.205910,0.990359,813.434998,140.424377,0.997931
4,CCM247-TR-Contexto2,CCM247,4,M,TR,2,1046.063232,71.804070,0.998103,971.969971,...,0.999164,945.414001,68.417107,0.983467,887.154846,96.205910,0.990359,813.434998,140.424377,0.997931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1197015,CCM271-HAB2-Contexto1,CCM271,2,M,HAB2,1,528.796631,533.792542,0.999856,481.587616,...,0.999702,483.049103,594.929443,0.998933,403.220032,643.497559,0.960405,352.809113,614.836060,0.999270
1197016,CCM271-HAB2-Contexto1,CCM271,2,M,HAB2,1,528.539062,534.250671,0.999848,481.667633,...,0.999757,483.005585,594.597595,0.998903,404.952240,643.319763,0.960552,352.520569,616.174438,0.998699
1197017,CCM271-HAB2-Contexto1,CCM271,2,M,HAB2,1,529.096802,533.778870,0.999866,481.335693,...,0.999738,482.468811,595.128113,0.998632,404.614166,641.280334,0.955707,353.078094,615.785828,0.999123
1197018,CCM271-HAB2-Contexto1,CCM271,2,M,HAB2,1,528.790833,533.874207,0.999857,481.654633,...,0.999742,482.896423,594.649353,0.998944,404.647156,641.285034,0.944732,351.819092,616.934631,0.997847


In [9]:
id = "CCM247"
region = "center_b"
day = 1
selected_columns =  ['frame', 'id_amostra', "rat", 'experimental_context',  'day', f'{region}_x', f'{region}_y', f'{region}_likelihood']

In [11]:
df = filter_features(df_coords, bodypart=region, selected_columns=selected_columns, output_directory=directory)
df

the file is saved


Unnamed: 0,frame,id_amostra,rat,experimental_context,day,center_b_x,center_b_y,center_b_likelihood
0,0,CCM247-TR-Contexto2,CCM247,2,4,886.874023,96.242256,0.990015
1,1,CCM247-TR-Contexto2,CCM247,2,4,887.161743,96.214500,0.990392
2,2,CCM247-TR-Contexto2,CCM247,2,4,887.154846,96.205910,0.990359
3,3,CCM247-TR-Contexto2,CCM247,2,4,887.154846,96.205910,0.990359
4,4,CCM247-TR-Contexto2,CCM247,2,4,887.154846,96.205910,0.990359
...,...,...,...,...,...,...,...,...
1197015,17997,CCM271-HAB2-Contexto1,CCM271,1,2,403.220032,643.497559,0.960405
1197016,17998,CCM271-HAB2-Contexto1,CCM271,1,2,404.952240,643.319763,0.960552
1197017,17999,CCM271-HAB2-Contexto1,CCM271,1,2,404.614166,641.280334,0.955707
1197018,18000,CCM271-HAB2-Contexto1,CCM271,1,2,404.647156,641.285034,0.944732


In [12]:
uncertain_data, df_end = filter_likelihood(df, region=region, output_directory=directory)
uncertain_data

the file is saved


Unnamed: 0,frame,id_amostra,rat,experimental_context,day,center_b_x,center_b_y,center_b_likelihood
717,717,CCM247-TR-Contexto2,CCM247,2,4,404.817322,251.651108,0.753839
736,736,CCM247-TR-Contexto2,CCM247,2,4,409.227356,249.015518,0.766713
1238,1238,CCM247-TR-Contexto2,CCM247,2,4,408.670746,585.576233,0.757010
1239,1239,CCM247-TR-Contexto2,CCM247,2,4,406.976837,599.687683,0.699707
1242,1242,CCM247-TR-Contexto2,CCM247,2,4,402.002228,599.525024,0.576802
...,...,...,...,...,...,...,...,...
1196683,17665,CCM271-HAB2-Contexto1,CCM271,1,2,442.919769,645.509033,0.050281
1196684,17666,CCM271-HAB2-Contexto1,CCM271,1,2,397.012817,624.553589,0.138374
1196685,17667,CCM271-HAB2-Contexto1,CCM271,1,2,389.137634,641.432190,0.682456
1196686,17668,CCM271-HAB2-Contexto1,CCM271,1,2,391.155029,647.517883,0.454690
