### PANDAS, PYTHON

# How to use Pandas Count and Value_Counts

[Website](https://kanoki.org/2020/03/09/how-to-use-pandas-count-and-value_counts/)

In [1]:
import numpy as np
import pandas as pd

In [34]:
idx = pd.MultiIndex.from_tuples([('Chris',48), ('Brian',np.nan), ('David',65),('Chris',34),('John',28)], names=['Name', 'Age'])
col = ['Salary']

df = pd.DataFrame([120000, 140000, 90000, 101000, 59000], idx, col)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Salary
Name,Age,Unnamed: 2_level_1
Chris,48.0,120000
Brian,,140000
David,65.0,90000
Chris,34.0,101000
John,28.0,59000


In [35]:
df.groupby(level='Name').count()

Unnamed: 0_level_0,Salary
Name,Unnamed: 1_level_1
Brian,1
Chris,2
David,1
John,1


In [36]:
df=df.reset_index()

In [37]:
df.groupby(by='Name').count()

Unnamed: 0_level_0,Age,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Brian,0,1
Chris,2,2
David,1,1
John,1,1


In [38]:
df.groupby(by='Name').agg('count')

Unnamed: 0_level_0,Age,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Brian,0,1
Chris,2,2
David,1,1
John,1,1


In [39]:
df['freq']=df.groupby(by='Name')['Name'].transform('count')
df

Unnamed: 0,Name,Age,Salary,freq
0,Chris,48.0,120000,2
1,Brian,,140000,1
2,David,65.0,90000,1
3,Chris,34.0,101000,2
4,John,28.0,59000,1


In [41]:
df['Name'].map(df['Name'].value_counts())

0    2
1    1
2    1
3    2
4    1
Name: Name, dtype: int64

In [17]:
df['Name'].value_counts()

Chris    2
Brian    1
David    1
John     1
Name: Name, dtype: int64

In [18]:
#sort by frequency
df['Name'].value_counts(sort=True)

Chris    2
Brian    1
David    1
John     1
Name: Name, dtype: int64

In [19]:
# sort by ascending
df['Name'].value_counts(sort=True, ascending=True)

Brian    1
David    1
John     1
Chris    2
Name: Name, dtype: int64

In [20]:
# Relative counts - find percentage
df['Name'].value_counts(normalize=True)

Chris    0.4
Brian    0.2
David    0.2
John     0.2
Name: Name, dtype: float64

In [21]:
df['Salary'].value_counts(bins=2)

(99500.0, 140000.0]     3
(58918.999, 99500.0]    2
Name: Salary, dtype: int64

In [23]:
df = pd.DataFrame(np.random.randint(0, 2, (5, 3)), columns=["A", "B","C"])
df

Unnamed: 0,A,B,C
0,0,0,0
1,1,0,0
2,0,0,1
3,1,1,1
4,0,0,0


In [24]:
df.apply(pd.Series.value_counts, axis=1)

Unnamed: 0,0,1
0,3.0,
1,2.0,1.0
2,2.0,1.0
3,,3.0
4,3.0,


In [25]:
df.apply(pd.Series.value_counts, axis=0)

Unnamed: 0,A,B,C
0,3,4,3
1,2,1,2


In [26]:
df1 = df.melt(var_name='columns', value_name='values')
pd.crosstab(index=df1['values'], columns=df1['columns'])

columns,A,B,C
values,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3,4,3
1,2,1,2


In [29]:
# By column
df[df == 1].sum(axis=0)

A    2.0
B    1.0
C    2.0
dtype: float64

In [30]:
# By row
df[df == 1].sum(axis=1)

0    0.0
1    1.0
2    1.0
3    3.0
4    0.0
dtype: float64