In [1]:
import os
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import ipynb
from scipy.spatial import distance_matrix
import random
import sys as sys
pd.options.mode.chained_assignment = None

In [None]:
def summarise_listening_history(triplets):
    """
    Take a dataframe describing the summary of users listening history and
    select the users according to thresholds value (view parameters description).

    Parameters:
        df (type : pd.DataFrame): a dataframe whose columns are
        
            user : a unique id of the user.
            
            track_id : the id of a track.
            
            listening_count : the number of times the user has listened to the track.
            
            ... : other columns describing the users, the tracks or the context of the interaction between the user and the track.

    Return :
        a dataframe corresponding to the summary of the users listening history whose columns are
            user : a the unique id of the user
            
            listening_count : the total number of listenings of the user
            
            track_count : the number of different tracks the users have listened
    """
    users_summary = triplets.groupby('user').agg({'listening_count' : sum, 'track_id' : len}).reset_index()
    users_summary.columns = ['user', 'listening_count', 'track_count']
    
    return users_summary

In [None]:
def user_filter(df, nlist_min = 1, nlist_max = None, ntracks_min = 1, ntracks_max = None):
    """
    Take a dataframe correspondng to the summary of the users listening history and
    select the users according to thresholds value (view parameters description).

    Parameters:
        df : a dataframe whose columns are
            user : a the unique id of the user
            
            listening_count : the total number of listenings of the user
            
            track_count : the number of different tracks the users have listened

        nlist_min : the minimum number of listenings allowed for a user.

        nlist_max : the maximum number of listenings allowed for a user.
        
        ntracks_max : the minimum number of allowed for a user.
        
        ntracks_max : the maximum number of listenings allowed for a user.
        

    Return :
        The list of users id selected according to the thresholds
    """
    
    if nlist_max is None:
        nlist_max = max(df.listening_count)
    
    if ntracks_max is None:
        ntracks_max = max(df.track_count)
    
    return df.loc[(df.listening_count >= nlist_min)&(df.listening_count <= nlist_max)&(df.track_count >= ntracks_min)&(df.track_count <= ntracks_max),:].user