In [1]:
import models
import torch
import numpy as np
import importlib 
from torch_geometric.data import Data, DataLoader
import tqdm
import pandas as pd
import torch.nn as nn
import utils
import os


In [14]:
import models
import torch
import numpy as np
import importlib 
from torch_geometric.data import Data, DataLoader
import tqdm
import pandas as pd
import torch.nn as nn
import utils
import os
import matplotlib.pyplot as plt
import joblib
import dataloader

GAT_MASK = {'General_Contacts': True, 'Sniffing': True, 'Sniffing_head': False, 'Sniffing_body': False, 'Sniffing_anogenital': False, 'Following': True, 'Dominance': True, 'Grooming': True}


MODELS = {#'General_Contacts': [models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean'],
        #'General_Contacts': [False],
        #'Sniffing': [ models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean'],
        #'Sniffing': [False],
        #'Sniffing_head': [False],
        #'Sniffing_body': [False],
        #'Sniffing_anal': [False],
        # 'Sniffing_head': [models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean'],
        # 'Sniffing_other': [models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean'],
        # 'Sniffing_anal': [models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean'],
        #'Following': [False],  
        #'Following': [True, models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean'],
        #'Dominance': [False],
        #'Grooming': [False],

        #'Dominance': [True, models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean'],
        # 'Rearing': [models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean'],
        #'Grooming': [True, models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2), models.ClassificationHead(n_latent=64, nhid = 32, nout = 2), 'mean']
        
        }

MODELS_PATH = {'General_Contacts': ['models/GATmodels/GeneralContact_checkpoint_epoch_610', 'models/baseline_models/new_dataset/model_gencont.pkl'],
               'Sniffing': ['models/GATmodels/Sniffing_R_checkpoint_epoch_570', 'models/baseline_models/new_dataset/model_sniffR.pkl'],
                'Sniffing_head': [None,'models/baseline_models/new_dataset/model_Shead.pkl'],
                'Sniffing_body': [None,'models/baseline_models/new_dataset/model_Sbody.pkl'],
                'Sniffing_anogenital': [None,'models/baseline_models/new_dataset/model_Sanus.pkl'],
                'Following': ['models/GATmodels/Following_checkpoint_epoch_442', 'models/baseline_models/new_dataset/model_poursuitR.pkl'],
                'Dominance':  ['models/GATmodels/Dominance_epoch_750','models/baseline_models/new_dataset/model_domR.pkl'],
                'Grooming': ['models/GATmodels/Grooming_checkpoint_epoch_960','models/baseline_models/new_dataset/model_groomR.pkl'],}
             
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Function that returns the model based on the behavior
def get_model(behavior, gat = True) -> nn.Module:
    ''' Returns the model based on the behavior.
        Possible behaviors: 'General_Contact', 'Sniffing', 'Sniffing_head', 'Sniffing_other', 'Sniffing_anal', 'Poursuit', 'Dominance', 'Rearing', 'Grooming'
    Parameters:
        - behavior: str, the behavior of the model
        - gat: bool, whether the model is a GAT model or not
    Returns:
        - model: nn.Module, the model
    '''
    if gat:
        gatencoder = models.GATEncoder(nout = 64, nhid=32, attention_heads = 2, n_in = 4, n_layers=4, dropout=0.2)
        classifier =  models.ClassificationHead(n_latent=64, nhid = 32, nout = 2)
        readout = 'mean'
        model = models.GraphClassifier(encoder=gatencoder, classifier=classifier, readout=readout)
    else:
        model = None
    return model

def load_model(model_path, device, behaviour = 'General_Contact', gat = True):
    ''' This function loads a model from a given path and returns it.
    Args:
        model_path: path to the model
        device: device on which the model should be loaded
        behaviour: behaviour of the model
        gat: whether the model is a GAT model or not
    Returns:
        model: the loaded model
    '''
    model = get_model(behaviour, gat) # get the model
    if model is None:
        model = joblib.load(model_path) # load the model
    else:
        checkpoint = torch.load(model_path, map_location=device) # load the model
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(device) # send the model to the device
        model.eval() # set the model to evaluation mode
    return model

def create_csv_with_output_behaviour(output, behaviour, path):
    ''' This function creates a csv file with the output of the model for each frame.
    Args:
        output: the output of the model
        behaviour: the behaviour analyzed
        path: the path where the csv file should be saved
    '''
    df = pd.DataFrame(output, columns = ['Frame', behaviour])
    df.to_csv(path, index = False)

def inference(behaviour, data, gat = True, save = False, path_to_save = None):
    ''' This function runs the inference on the specified behavior, and save
        the results in the specified path.
    Args:
        behaviour: str, the behavior on which to run the inference
        data: list of torch_geometric.data.Data or numpy arrays, the data on which to run the inference
        gat: bool, whether to use the GAT model or not (if False, the model is the Linear model)
        save: bool, whether to save the results or not
        path_to_save: str, the path where to save the results (if save is True)
    Returns:
        outputs: pd.DataFrame, the results of the inference
    ''' 
    if gat:
        model_path = MODELS_PATH[behaviour][0] # get the model path
    else:
        model_path = MODELS_PATH[behaviour][1] # get the model path
    model = load_model(model_path, DEVICE, behaviour, gat) # load the model
    if gat:
        loader = DataLoader(data, batch_size=1, shuffle=False) # create the DataLoader


    if behaviour == 'General_Contacts':
        if gat:
            outputs = pd.DataFrame(np.zeros((len(loader), 2)), columns = ['Frame', behaviour]) # create the DataFrame to store the results
            softmax = nn.Softmax(dim=1) # create the softmax function
            print('Running inference on General_Contacts')
            for i, batch in enumerate(tqdm.tqdm(loader)):
                outputs.loc[i, 'Frame'] = int(batch.frame_mask.median().item()) # get the frame
                with torch.no_grad():
                    out = model(batch)
                    out = softmax(out)
                    outputs.loc[i, behaviour] = out.argmax(dim=1).cpu().numpy() # get the prediction

        else:
            outputs = pd.DataFrame(np.zeros((len(data), 2)), columns = ['Frame', behaviour])
            outputs['Frame'] = range(len(data))

            print('Running inference on General_Contacts')
            
            y_pred = model.predict(data)
            outputs[behaviour] = y_pred

    else:
       
        if gat:
            outputs = pd.DataFrame(np.zeros((len(loader), 3)), columns = ['Frame', behaviour + '_R', behaviour + '_V']) # create the DataFrame to store the results
            softmax = nn.Softmax(dim=1)
            print('Running inference on', behaviour + '_R')
            for i, batch in enumerate(tqdm.tqdm(loader)):
                outputs.loc[i, 'Frame'] = int(batch.frame_mask.median().item()) 
                with torch.no_grad():
                    out = model(batch)
                    out = softmax(out)
                    outputs.loc[i, behaviour + '_R'] = out.argmax(dim=1).cpu().numpy()
        
            # Swap identities
            utils.swap_identities(data)
            loader = DataLoader(data, batch_size=1, shuffle=False)
            print('Running inference on', behaviour + '_V')
            for i, batch in enumerate(tqdm.tqdm(loader)):
                with torch.no_grad():
                    out = model(batch)
                    out = softmax(out)
                    outputs.loc[i, behaviour + '_V'] = out.argmax(dim=1).cpu().numpy()

        else:
            outputs = pd.DataFrame(np.zeros((len(data), 3)), columns = ['Frame', behaviour + '_R', behaviour + '_V'])
            outputs['Frame'] = range(len(data))
            coords_R = data.copy()
            coords_ind2 = data[:, data.shape[1]//2:].copy()
            data[:, data.shape[1]//2:] = data[:, :data.shape[1]//2]
            data[:, :data.shape[1]//2] = coords_ind2

            coords_V = data.copy()

            del data

            print('Running inference on', behaviour + '_R')
            
            y_pred_R = model.predict(coords_R)
            outputs[behaviour + '_R'] = y_pred_R

            print('Running inference on', behaviour + '_V')
            y_pred_V = model.predict(coords_V)
            outputs[behaviour + '_V'] = y_pred_V
            
    if save:
        outputs.to_csv(path_to_save, index = False)
    else:
        return outputs
    

  

def inference_all_behaviors(path_to_data, path_to_save, gat_mask = GAT_MASK):
    ''' This function runs the inference on all behaviors, and save
        the results in the specified path.
    Args:
        path_to_data: str, the path to the dataset to run the inference on, it should be a folder with a .pkl file and the .h5 files
        path_to_save: str, the path where to save the results
    ''' 

    data_coords = dataloader.DLCDataLoader(path_to_data, build_graph=False) # create the DataLoader
    data_graph = dataloader.DLCDataLoader(path_to_data, load_dataset=True) # create the DataLoader
    #torch.load(path_to_data, map_location=DEVICE) # load the data
    
    # Check if there're different videos
    videos_graph = np.unique([data.file for data in data_graph])
    videos_coords = np.unique([data[2] for data in data_coords])

    videos_graph = sorted(videos_graph)
    videos_coords = sorted(videos_coords)

    videos = videos_graph if videos_graph == videos_coords else print('The videos are different in the two datasets', videos_graph, videos_coords)

    del videos_graph, videos_coords

    data_per_video_graph = []
    data_per_video_coords = []

    for video in videos:
        data_per_video_graph.append([d for d in data_graph if d.file == video])
        data_per_video_coords.append([d[0] for d in data_coords.data_list if d[2] == video])
    for i, video in enumerate(videos):
        print('Running inference on video', video)
        outputs = []

        for behaviour in MODELS_PATH.keys():
            if gat_mask[behaviour]:
                outputs.append(inference(behaviour, data_per_video_graph[i], save = False, gat = True))
            else:
                outputs.append(inference(behaviour, data_per_video_coords[i][0], save = False, gat = False))
        
        # Concatenate the outputs using the column 'frame' as index
        outputs = [output.set_index('Frame') for output in outputs] # Set the column 'Frame' as index
        
        # Concatenate the outputs
        output = pd.concat(outputs, axis=1) 

        # Sort by frame
        output.sort_values(by = 'Frame', inplace = True)

        # Fill the missing values with 0
        output.fillna(0, inplace = True)

        # Save the outputs
        output.to_csv(os.path.join(path_to_save, video + '_output.csv'))
        
def get_number_of_occurrences(data):
    ''' This function returns the number of occurrences of a behavior in the data. i.e. the number of times a 0 is followed by a 1. '''
    count = 0
    for i in range(len(data)-1):
        if data[i] == 0 and data[i+1] == 1:
            count += 1
    return count

def distribution_of_ocurrencies(data, column, num_interv = 6):
    ''' This function computes the distribution of the number of occurrences of a behavior per decil. 
    
    Args:
    
    data: pd.DataFrame, the data
    column: str, the column of the data to analyze
    num_interv: int, the number of intervals to consider'''
    # Get the length of the data
    length = len(data)
    # Get the interval
    interval = length // num_interv
    # Get the distribution
    distribution = []
    for i in range(num_interv):
        distribution.append(data[column][i*interval:(i+1)*interval].sum())
    return distribution

def plot_distribution(distribution, column, path, video_name):
    ''' This function plots the distribution of the number of occurrences of a behavior per decil. '''
    plt.figure()
    plt.bar(range(6), distribution)
    plt.xlabel('Decil')
    plt.ylabel('Number of occurrences')
    plt.title('Distribution of the number of occurrences of ' + column + ' for video ' + video_name)
    plt.savefig(path)
    plt.close()


def get_statistics(path_to_files, num_intervals = 6):
    ''' This function computes the statistics of the model outputs per video and save them in a csv file. 
    
    Args:
        path_to_files: str, the path to the folder containing the csv files
        num_intervals: int, the number of intervals to consider for the distribution plots
        '''

    # Get the list of csv files
    files = [f for f in os.listdir(path_to_files) if f.endswith('.csv')]

    # Create a DataFrame to store the statistics
    statistics = pd.DataFrame(columns = ['video', 'behavior', 'latancy', 'duration (s)', 'duration (frames)', 'number_of_occurrences'])

    # Make a folder for each video
    for file in files:
        video = file.split('_output')[0]
        if not os.path.exists(os.path.join(path_to_files, video)):
            os.makedirs(os.path.join(path_to_files, video))

    for file in files:
        # Load the data
        data = pd.read_csv(os.path.join(path_to_files, file))
        video = file.split('_output')[0]
        statistics_per_video = pd.DataFrame(columns = ['video', 'behavior', 'latancy', 'duration (s)', 'duration (frames)', 'number_of_occurrences'])
        distribution = pd.DataFrame(columns = ['Behavior', 'interval 0', 'interval 1', 'interval 2', 'intarval 3', 'interval 4', 'interval 5'])
        for column in data.columns[1:]:
            # Get the statistics
            latancy = data[column].idxmax()
            duration = data[column].sum()
            number_of_occurrences = get_number_of_occurrences(data[column])

            # Append the statistics to the DataFrame
            new_row = pd.DataFrame({'video': [video], 'behavior': [column], 'latancy': [latancy], 'duration (s)': [duration / 15 ], 'duration (frames)': [duration], 'number_of_occurrences': [number_of_occurrences]})
            #statistics = pd.concat([statistics, new_row], ignore_index=True)
            statistics_per_video = pd.concat([statistics_per_video, new_row], ignore_index=True)
        
            decils_dist = distribution_of_ocurrencies(data, column)
            dist = pd.DataFrame({'Behavior': [column], 'interval 0': [decils_dist[0]], 'interval 1': [decils_dist[1]], 'interval 2': [decils_dist[2]], 'interval 3': [decils_dist[3]], 'interval 4': [decils_dist[4]], 'interval 5': [decils_dist[5]]})
            distribution = pd.concat([distribution, dist], ignore_index=True)

            # Build images with the distribution 
            plot_distribution(decils_dist, column, os.path.join(path_to_files, video, column + '_distribution.png'), video)
        
        # Save the distribution
        distribution.to_csv(os.path.join(path_to_files, video, 'distribution.csv'), index = False, sep=';')
        
        statistics = pd.concat([statistics, statistics_per_video], ignore_index=True)

        # Save the statistics per video
        statistics_per_video.to_csv(os.path.join(path_to_files, video, 'statistics.csv'), index = False, sep=';')               


    # Save the statistics
    statistics.to_csv(os.path.join(path_to_files, 'statistics.csv'), index = False, sep=';')

    return statistics



In [15]:
import importlib

importlib.reload(dataloader)

<module 'dataloader' from 'c:\\Users\\jalvarez\\Documents\\Code\\GitHubCOde\\Behavioral_Tagging_of_Mice_in_multiple_Mice_dataset_using_Deep_Learning\\src\\dataloader.py'>

In [16]:
path_to_outputs = r'c:\Users\jalvarez\Documents\Data\DataLoadaerTESTTSTST\outputs'

stats = get_statistics(path_to_outputs)

FileNotFoundError: [WinError 3] Le chemin d’accès spécifié est introuvable: 'c:\\Users\\jalvarez\\Documents\\Data\\DataLoadaerTESTTSTST\\outputs'

In [17]:
# turn off warnings
pd.options.mode.chained_assignment = None

path_to_data = r'c:\Users\jalvarez\Documents\Data\TEST'
path_to_save = r'c:\Users\jalvarez\Documents\Data\TEST\out'

inference_all_behaviors(path_to_data, path_to_save)


['DMD_mal_Test 1DLC_dlcrnetms5_More_BodyPartsJul9shuffle1_740000_el_filtered.h5', 'MDX_fem_Test 10DLC_dlcrnetms5_More_BodyPartsJul9shuffle1_740000_el_filtered.h5']
Loading data from c:\Users\jalvarez\Documents\Data\TEST, where we have 2 files
We have 2 files
Loading file DMD_mal_Test 1DLC_dlcrnetms5_More_BodyPartsJul9shuffle1_740000_el_filtered.h5
No behaviour file for DMD_mal_Test 1


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  self.coords[ind].loc[frames_to_set_nan, (body_part, 'x')] = np.nan
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to updat

Loading file MDX_fem_Test 10DLC_dlcrnetms5_More_BodyPartsJul9shuffle1_740000_el_filtered.h5
No behaviour file for MDX_fem_Test 10


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  self.coords[ind].loc[frames_to_set_nan, (body_part, 'x')] = np.nan
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to updat

Number of files: 2
Dataset loaded from c:\Users\jalvarez\Documents\Data\TEST\ds.pkl
Running inference on video DMD_mal_Test 1




Running inference on General_Contacts


100%|██████████| 2628/2628 [00:36<00:00, 71.74it/s]


Running inference on Sniffing_R


100%|██████████| 2628/2628 [00:36<00:00, 72.11it/s]


Running inference on Sniffing_V


100%|██████████| 2628/2628 [00:35<00:00, 73.96it/s]


Running inference on Sniffing_head_R
Running inference on Sniffing_head_V
Running inference on Sniffing_body_R
Running inference on Sniffing_body_V
Running inference on Sniffing_anogenital_R
Running inference on Sniffing_anogenital_V




Running inference on Following_R


100%|██████████| 2628/2628 [00:36<00:00, 72.59it/s]


Running inference on Following_V


100%|██████████| 2628/2628 [00:36<00:00, 72.98it/s]


Running inference on Dominance_R


100%|██████████| 2628/2628 [00:36<00:00, 72.58it/s]


Running inference on Dominance_V


100%|██████████| 2628/2628 [00:36<00:00, 72.55it/s]


Running inference on Grooming_R


100%|██████████| 2628/2628 [00:36<00:00, 72.46it/s]


Running inference on Grooming_V


100%|██████████| 2628/2628 [00:35<00:00, 73.65it/s]


Running inference on video MDX_fem_Test 10
Running inference on General_Contacts


100%|██████████| 5402/5402 [01:18<00:00, 68.71it/s]


Running inference on Sniffing_R


100%|██████████| 5402/5402 [01:21<00:00, 66.52it/s]


Running inference on Sniffing_V


100%|██████████| 5402/5402 [01:13<00:00, 73.65it/s]


Running inference on Sniffing_head_R
Running inference on Sniffing_head_V
Running inference on Sniffing_body_R
Running inference on Sniffing_body_V




Running inference on Sniffing_anogenital_R
Running inference on Sniffing_anogenital_V
Running inference on Following_R


100%|██████████| 5402/5402 [01:05<00:00, 82.91it/s]


Running inference on Following_V


100%|██████████| 5402/5402 [01:03<00:00, 84.60it/s]


Running inference on Dominance_R


100%|██████████| 5402/5402 [01:15<00:00, 71.18it/s]


Running inference on Dominance_V


100%|██████████| 5402/5402 [01:08<00:00, 79.17it/s]


Running inference on Grooming_R


100%|██████████| 5402/5402 [01:12<00:00, 74.19it/s]


Running inference on Grooming_V


100%|██████████| 5402/5402 [01:18<00:00, 68.47it/s]


In [114]:
data_coords.data_list

[(array([[0.7063875 , 0.70647292, 1.        , ..., 0.62508732, 0.57094032,
          1.        ],
         [0.70960781, 0.70647292, 1.        , ..., 0.63012969, 0.57161189,
          0.94117647],
         [0.71200313, 0.70647292, 1.        , ..., 0.67104531, 0.60798945,
          1.        ],
         ...,
         [0.4944    , 0.19241875, 1.        , ..., 0.16781211, 0.42656998,
          1.        ],
         [0.4944    , 0.19241875, 1.        , ..., 0.14798346, 0.42604767,
          1.        ],
         [0.4944    , 0.19238125, 1.        , ..., 0.13327537, 0.39741924,
          0.94117647]]),
  None,
  'DMD_mal_Test 1'),
 (array([[0.82870312, 0.19608125, 1.        , ..., 0.82753309, 0.55641936,
          1.        ],
         [0.82908906, 0.19672708, 1.        , ..., 0.82855717, 0.55665196,
          1.        ],
         [0.84225625, 0.19744167, 1.        , ..., 0.82895524, 0.5578886 ,
          1.        ],
         ...,
         [0.238925  , 0.77146667, 1.        , ..., 0.910284

In [115]:

for video in videos:
    data_per_video_graph.append([d for d in data_graph if d.file == video])
    data_per_video_coords.append([d[0] for d in data_coords.data_list if d[2] == video])
for i, video in enumerate(videos):
    print('Running inference on video', video)
    outputs = []

    for behaviour in MODELS.keys():
        if MODELS[behaviour][0]:
            outputs.append(inference(behaviour, data_per_video_graph[i], save = False))
        else:
            outputs.append(inference(behaviour, data_per_video_coords[i][0], save = False))
    # Concatenate the outputs using the column 'frame' as index
    # Set the column 'frame' as index
    outputs = [output.set_index('frame') for output in outputs]
    # discard the column 'frame' from all the outputs except the first one
    #outputs = [output.drop(columns = ['frame']) for output in outputs[1:]]
    # Concatenate the outputs
    outputs = pd.concat(outputs, axis=1)
    # Save the outputs
    outputs.to_csv(os.path.join(path_to_save, video + '_output.csv'))


Running inference on video DMD_mal_Test 1
Running inference on Following_R
Running inference on Following_V
Running inference on video MDX_fem_Test 10
Running inference on Following_R
Running inference on Following_V


In [14]:
videos_graph = np.unique([data.file for data in data_graph])
videos_coords = np.unique([data[2] for data in data_coords])
data_per_video = []

In [19]:
# Order both lists in the same way
videos_graph = sorted(videos_graph)
videos_coords = sorted(videos_coords)

videos = videos_graph if videos_graph == videos_coords else print('The videos are different in the two datasets')
del videos_graph, videos_coords

In [157]:
videos = np.unique([data.file for data in data])
data_per_video = []
for video in videos:
    data_per_video.append([d for d in data if d.file == video])
            

In [165]:
path_to_save = r'c:\Users\jalvarez\Documents\Data\DataLoadaerTESTTSTST\outputs'
for i, data in enumerate(data_per_video):
    video_name = videos[i].split('DLC')[0]
    print('Running inference on video', video_name)
    outputs = []
    for behaviour in MODELS_PATH.keys():
        outputs.append(inference(behaviour, data, save = False))
    # Concatenate the outputs using the column 'frame' as index
    # Set the column 'frame' as index
    outputs = [output.set_index('frame') for output in outputs]
    # discard the column 'frame' from all the outputs except the first one
    #outputs = [output.drop(columns = ['frame']) for output in outputs[1:]]
    # Concatenate the outputs
    outputs = pd.concat(outputs, axis=1)
    # Save the outputs
    outputs.to_csv(os.path.join(path_to_save, video_name + '_output.csv'))




Running inference on video DMD_fem_Test_1
Running inference on General_Contacts


  0%|          | 0/2627 [00:00<?, ?it/s]

100%|██████████| 2627/2627 [00:36<00:00, 71.53it/s]


Running inference on Sniffing_R


100%|██████████| 2627/2627 [00:36<00:00, 71.31it/s]


Running inference on Sniffing_V


100%|██████████| 2627/2627 [00:36<00:00, 71.52it/s]


Running inference on video DMD_mal_Test_1
Running inference on General_Contacts


100%|██████████| 2628/2628 [00:37<00:00, 70.77it/s]


Running inference on Sniffing_R


100%|██████████| 2628/2628 [00:38<00:00, 68.96it/s]


Running inference on Sniffing_V


100%|██████████| 2628/2628 [00:36<00:00, 72.22it/s]


In [161]:
len(data_per_video[1])

2628

In [3]:
importlib.reload(models)

<module 'models' from 'c:\\Users\\jalvarez\\Documents\\Code\\GitHubCOde\\Behavioral_Tagging_of_Mice_in_multiple_Mice_dataset_using_Deep_Learning\\src\\models.py'>

In [4]:
def load_model(model_path, device, behaviour = 'General_Contact'):
    ''' This function loads a model from a given path and returns it.
    Args:
        model_path: path to the model
        device: device on which the model should be loaded
        behaviour: behaviour of the model
    Returns:
        model: the loaded model
    '''
    model = get_model(behaviour) # get the model
    checkpoint = torch.load(model_path, map_location=device) # load the model
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device) # send the model to the device
    model.eval() # set the model to evaluation mode
    return model

In [40]:
model_path = r'd:\Backup_mantenimiento_ruche\Data\Checkpoints\new_encoder_no_linearResCon\General_Contacts\checkpoint_epoch_310.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [41]:
model = load_model(model_path, device)

In [42]:
# Load the data
data_path = r'c:\Users\jalvarez\Documents\Data\DataLoadaerTESTTSTST\dataset.pkl'

In [43]:
# Analyze the data
data = torch.load(data_path)

In [44]:
# Get the behaviour
loader = DataLoader(data, batch_size=1, shuffle=False)



In [45]:
a = next(iter(loader))
a.frame_mask.median().item()

2

In [46]:
behaviour = 'General_Contact'

In [47]:
outputs = pd.DataFrame(np.zeros((len(loader), 3)), columns = ['frame', behaviour + '_R', behaviour + '_V']) # create the DataFrame to store the results
softmax = nn.Softmax(dim=1)
for i, batch in enumerate(tqdm.tqdm(loader)):
    outputs.loc[i, 'frame'] = batch.frame_mask.median().item()
    with torch.no_grad():
        out = model(batch)
        out = softmax(out)
        outputs.loc[i, behaviour + '_R'] = out.argmax(dim=1).cpu().numpy()
    

100%|██████████| 2627/2627 [00:41<00:00, 63.80it/s]


In [89]:
def create_csv_with_output_behaviour(output, behaviour, path):
    ''' This function creates a csv file with the output of the model for each frame.
    Args:
        output: the output of the model
        behaviour: the behaviour analyzed
        path: the path where the csv file should be saved
    '''
    df = pd.DataFrame(output, columns = ['frame', behaviour])
    df.to_csv(path, index = False)

In [90]:
path_to_save = r'c:\Users\jalvarez\Documents\Data\DataLoadaerTESTTSTST\output.csv'
create_csv_with_output_behaviour(output, 'General_Contact', path_to_save)

In [4]:
# ground truth file
path_to_ground_truth = r'c:\Users\jalvarez\Documents\Data\DataLoadaerTESTTSTST\GT\Test_1.csv'
ground_truth = pd.read_csv(path_to_ground_truth)

In [5]:
ground_truth

Unnamed: 0,Frame,General_Contacts,Sniffing_R,Sniffing_head_R,Sniffing_anogenital_R,Sniffing_body_R,Following_R,Dominance_R,Rearing_R,Grooming_R,Sniffing_head_V,Sniffing_anogenital_V,Sniffing_body_V,Sniffing_V,Rearing_V,Grooming_V,Following_V,Dominance_V
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5402,5402,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5403,5403,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5404,5404,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5405,5405,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
path_to_output = r'c:\Users\jalvarez\Documents\Data\DataLoadaerTESTTSTST\outputs\MDX_fem_Test_1_output.csv'
output = pd.read_csv(path_to_output)
output

Unnamed: 0,frame,General_Contacts,Sniffing_R,Sniffing_V,Following_R,Following_V,Grooming_R,Grooming_V
0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
1,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
2,4.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
3,5.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
4,6.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
5398,5400.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5399,5401.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5400,5402.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5401,5403.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
output.loc[:,'Grooming_R'].value_counts()

Grooming_R
0.0    4984
1.0     419
Name: count, dtype: int64

In [8]:
ground_truth.loc[:,'Grooming_R'].value_counts()

Grooming_R
0.0    5407
Name: count, dtype: int64

In [9]:
output.loc[:,'Sniffing_R'].value_counts()

Sniffing_R
1.0    3525
0.0    1878
Name: count, dtype: int64

In [10]:
output.loc[:,'Sniffing_V'].value_counts()

Sniffing_V
0.0    4035
1.0    1368
Name: count, dtype: int64

In [11]:
# drop first and last rows of ground truth
ground_truth = ground_truth.iloc[2:-2, :]
ground_truth

Unnamed: 0,Frame,General_Contacts,Sniffing_R,Sniffing_head_R,Sniffing_anogenital_R,Sniffing_body_R,Following_R,Dominance_R,Rearing_R,Grooming_R,Sniffing_head_V,Sniffing_anogenital_V,Sniffing_body_V,Sniffing_V,Rearing_V,Grooming_V,Following_V,Dominance_V
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5400,5400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5401,5401,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5402,5402,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5403,5403,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
ground_truth.loc[:,'Sniffing_R'].value_counts()

Sniffing_R
1.0    3521
0.0    1882
Name: count, dtype: int64

In [13]:
ground_truth.loc[:,'Sniffing_V'].value_counts()

Sniffing_V
0.0    5252
1.0     151
Name: count, dtype: int64

In [14]:
ground_truth['General_Contacts']

2       0.0
3       0.0
4       0.0
5       0.0
6       0.0
       ... 
5400    0.0
5401    0.0
5402    0.0
5403    0.0
5404    0.0
Name: General_Contacts, Length: 5403, dtype: float64

In [24]:
# ground truth file


# Get accuracy
def get_accuracy(ground_truth, output, behavior):
    ''' This function returns the accuracy of the model.
    Args:
        ground_truth: the ground truth
        output: the output of the model
    Returns:
        accuracy: the accuracy of the model
    '''
    #ground_truth = ground_truth.iloc[2:-2, :]
    accuracy = (ground_truth[behavior].values == output[behavior].values).mean()
    accuracy_active_class = (output[behavior].values[ground_truth[behavior].values == 1] == 1).mean()
    accuracy_inactive_class = (output[behavior].values[ground_truth[behavior].values == 0] == 0).mean()
    return accuracy, accuracy_active_class, accuracy_inactive_class

get_accuracy(ground_truth, output, 'Grooming_V')

  accuracy_active_class = (output[behavior].values[ground_truth[behavior].values == 1] == 1).mean()


(0.7888210253562835, nan, 0.7888210253562835)

In [2]:
# Load the model
def load_checkpoint(model, optimizer, path, device):
    checkpoint = torch.load(path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    print(f"Checkpoint loaded from {path}, at epoch {epoch}")
    return model, optimizer, epoch

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Load the data
dataset = torch.load(r'c:\Users\jalvarez\Documents\Data\LargeDataset\entire_dataset.pkl', map_location=device)



cpu
