# `dskit` demo

In [9]:
import numpy as np
import pandas as pd

from dskit.demo import generate_demo_df
from dskit.pivots import check_empty_values, get_target_pivot
from dskit.preprocessing import merge_rare_values

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:.3f}'.format)

In [3]:
df = generate_demo_df(size=1_000)

## 1. `dskit.pivots`
### 1.1. `dskit.pivots.check_empty_values`

In [4]:
df['children_count'].value_counts(dropna=False).sort_index()

children_count
0.000    335
1.000    345
2.000    188
3.000     73
4.000     13
NaN       46
Name: count, dtype: int64

In [5]:
check_empty_values(df, col='children_count')

nans:  46 (4.6%)
zeros: 335 (33.5%)


### 1.2. `dskit.pivots.get_target_pivot`

In [6]:
get_target_pivot(df, col='client_class', target_col='total_payments')

Unnamed: 0_level_0,total_payments,n_items,%_items
client_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,19 377.67,45,4.5%
B,5 083.39,155,15.5%
C,1 008.81,800,80.0%


In [7]:
get_target_pivot(df, col='client_class', target_col='risk', positive_target=False)

Unnamed: 0_level_0,risk,n_items,%_items
client_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,6.7%,45,4.5%
B,9.0%,155,15.5%
C,20.9%,800,80.0%


In [8]:
get_target_pivot(df, col='client_class', target_col='risk', revenue_col='total_payments', positive_target=False)

Unnamed: 0_level_0,risk,n_items,%_items,total_payments,%_total_payments
client_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,6.7%,45,4.5%,871 994.93,35.3%
B,9.0%,155,15.5%,787 925.61,31.9%
C,20.9%,800,80.0%,807 045.60,32.7%


## 2. `dskit.preprocessing`
### 2.1. `dskit.preprocessing.merge_rare_values`

In [14]:
df['citizenship_merged'] = merge_rare_values(df['citizenship'], threshold=0.05)

In [16]:
print(df['citizenship'].nunique())
print(df['citizenship'].nunique())

10

In [15]:
get_target_pivot(df, col='citizenship_merged', target_col='total_payments', positive_target=True)

Unnamed: 0_level_0,total_payments,n_items,%_items
citizenship_merged,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
id,2 676.42,165,16.5%
us,2 647.60,192,19.2%
cn,2 518.93,259,25.9%
in,2 505.89,234,23.4%
OTHER,1 854.90,150,15.0%
