In [1]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 50

# Value counts

In [2]:
def construct_value_counts_df(df_values_counts,name_frequency,name_values,name_variables):
    
    """This is meant as a sub-function of value_counts_df (see below).
    Reconstructs a melted DataFrame with column and value in rows then returns a DataFrame with counts of values.
    
        Args:
            df: the DataFrame you want the values counted for.
            name_frequency,name_values,name_variables: names for columns of the melted DataFrame that 
                I had to assign to be able to change the index in the main function value_counts_df.
        Returns:
            pd.DataFrame
    
    """
    
    
    data = {}
    
    # Recuperating the names each column after it has been melted
    for col_name in df_values_counts[name_variables].unique():
        
        # Getting the slice where the column name can be found
        slice_col = df_values_counts[name_variables] == col_name
        
        
        # Getting the values on this slice
        values_series = df_values_counts.loc[slice_col,name_values]
        frequency_series = df_values_counts.loc[slice_col,name_frequency].astype(str) # count was as type int
        value_counts_series = values_series + ' (' + frequency_series + ')'
    
        data[col_name] = value_counts_series

        # This is to avoid length mismatch between columns, extend with np.nan for the length of the df
        max_length = len(df_values_counts) - data[col_name].size 
        extender = np.empty(max_length)
        extender[:] = np.nan

        data[col_name] = np.concatenate((data[col_name],extender))


    df_values_counts = pd.DataFrame(data)
    df_values_counts.index.name = 'Rank'
    df_values_counts.index = df_values_counts.index + 1
    
    # dropping the rows at the tail of the df that are completely empty. A column will fill the whole length
    # of the df only if it has unique values
    df_values_counts.dropna(axis = 0,how = 'all',inplace = True)
    
    return df_values_counts



def value_counts_df(df,null_value = '*Keine Angabe (Null)*',
                    construct_value_counts_df = construct_value_counts_df):
    
    """Returns a dataframe where each column displays its most frequent values.
    This means there is no connection between values on the same row.
    It gives a result similar to the method value_counts(). 
    Here the count of values is stored in parenthesis in each column. 
    The index is the rank (starting from 1 where 1 is the most frequent value in each column).
       
    
    Procedure:
        First the DataFrame is melted (columns are transformed into rows, so the DataFrame is extended),
        then with this melted DataFrame the function construct_value_counts_df 
        creates a new DataFrame with the counts of values.
    
    Args:
        df: DataFrame for which you want to count values.
        null_value: how to display null values.
        construct_value_counts_df: function that will reconstruct a DataFrame with the counts 
                                (see corresponding docstring).
    
    Returns:
        pd.DataFrame
    
    """
    
    df_values_counts = df.copy(deep = True)
    
    # Creates variable | value dataframe where variable is a column,
    # basically sums up the whole dataframe in two very long columns
    df_values_counts = pd.melt(df_values_counts)

    # filling null values with the given parameter and using type str to avoid any incompatibility issues
    df_values_counts = df_values_counts.fillna(null_value).astype(str) 

    # grouping by variable and value counting the number of values (similar to the method .value_counts())
    df_values_counts = df_values_counts.groupby(by=['variable','value'])[['value']].count() # [[]] -> df instead of a series
    
    # when grouping value will be in the index as well as in the count column, we can't reset the index
    # with identical column names
    name_frequency = 'Frequenz'
    name_values = 'Wert'
    name_variables = 'Spalte'
    
    df_values_counts.rename(columns = {'value':name_frequency},inplace = True)
    
    # now we can reset the index, which will go in to the columns and we are also going to rename these columns
    df_values_counts = df_values_counts.reset_index().rename(columns = {'value':name_values,'variable':name_variables})
        
    # sorting by the column name then the count of values, then reseting the index so that it's numbered correctly again
    df_values_counts = df_values_counts.sort_values(by = [name_variables,name_frequency], ascending = [True,False]).reset_index(drop=True)


    df_value_counts_results = construct_value_counts_df(df_values_counts,
                                                        name_frequency,
                                                        name_values,
                                                        name_variables)
    
    return df_value_counts_results

# Usage
**<font color = "green">Turn the cell below to "Code" then execute the whole notebook.</font>**

