# Pandas Tips and Tricks

## 01- How to find the version

In [1]:
import pandas as pd
pd.__version__

'1.3.4'

In [2]:
# Another way
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : 945c9ed766a61c7d2c0a7cbb251b6edebf9cb7d5
python           : 3.9.7.final.0
python-bits      : 64
OS               : Windows
OS-release       : 10
Version          : 10.0.19044
machine          : AMD64
processor        : Intel64 Family 6 Model 69 Stepping 1, GenuineIntel
byteorder        : little
LC_ALL           : None
LANG             : None
LOCALE           : English_Pakistan.1252

pandas           : 1.3.4
numpy            : 1.20.3
pytz             : 2021.3
dateutil         : 2.8.2
pip              : 21.2.4
setuptools       : 58.0.4
Cython           : 0.29.24
pytest           : 6.2.4
hypothesis       : None
sphinx           : 4.2.0
blosc            : None
feather          : None
xlsxwriter       : 3.0.1
lxml.etree       : 4.6.3
html5lib         : 1.1
pymysql          : None
psycopg2         : None
jinja2           : 2.11.3
IPython          : 7.29.0
pandas_datareader: None
bs4              : 4.10.0
bottleneck       : 1.3.2
fsspe

## 02- Make a DataFrame

In [3]:
# Method 1
df = pd.DataFrame({
    'A Col': [1,2,3,7,8],
    'B Col': [4,5,6,34,65]
})
df.head()

Unnamed: 0,A Col,B Col
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [4]:
# Method 2
import numpy as np
arr = np.array([[1,2,3], [4,5,6], [7,8,9]])
pd.DataFrame(arr)

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [5]:
# Method 3
pd.DataFrame(np.random.rand(4,8))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.520696,0.281294,0.898231,0.762289,0.473464,0.246602,0.520594,0.742727
1,0.673608,0.609321,0.439136,0.965792,0.160379,0.292747,0.936239,0.368097
2,0.958488,0.500491,0.675518,0.007869,0.091839,0.216158,0.749521,0.341853
3,0.803434,0.723253,0.092252,0.787798,0.507607,0.783766,0.217691,0.2779


In [6]:
pd.DataFrame(np.random.rand(4,9), columns=list('ABCDEFGHI'))

Unnamed: 0,A,B,C,D,E,F,G,H,I
0,0.550634,0.405515,0.437939,0.106501,0.05519,0.344821,0.741935,0.544415,0.379058
1,0.548297,0.33131,0.210745,0.23013,0.675901,0.923943,0.480303,0.583198,0.749846
2,0.822629,0.532171,0.507738,0.670042,0.380661,0.044877,0.851625,0.135735,0.992915
3,0.032711,0.970907,0.048611,0.382642,0.111289,0.17922,0.725025,0.325443,0.204811


## 03- How to Rename Columns

In [7]:
df = pd.DataFrame({
    'A Col': [1,2,3,7,8],
    'B Col': [4,5,6,34,65]
})
df.head()

Unnamed: 0,A Col,B Col
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [8]:
# Method 1
df.rename(columns={'A Col': 'Col_A', 'B Col': 'Col_B'}, inplace=True)
df.head()

Unnamed: 0,Col_A,Col_B
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [9]:
# Method 2
df.columns = ['col_aa', 'col_bb']
df.head()

Unnamed: 0,col_aa,col_bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [10]:
# Rename any Specific character
df.columns = df.columns.str.replace('_', '*')
df.head()

Unnamed: 0,col*aa,col*bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [11]:
# Adding prefix to columns
df = df.add_prefix('baba_')
df.head()

Unnamed: 0,baba_col*aa,baba_col*bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [12]:
# Adding suffix to columns
df = df.add_suffix('haha')
df.head()

Unnamed: 0,baba_col*aahaha,baba_col*bbhaha
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [13]:
df.columns = ['col_a', 'col_b']
df.head()

Unnamed: 0,col_a,col_b
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


## 04- Using Template Data

In [14]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns

# import dataset
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [15]:
# Summary of Data
df.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [16]:
# Saving Dataset
df.to_csv('tips.csv')
df.to_excel('tips.xlsx')

## 05- Using Your own Data

In [17]:
# import dataset from local drive
df = pd.read_csv('tips.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,0,16.99,1.01,Female,No,Sun,Dinner,2
1,1,10.34,1.66,Male,No,Sun,Dinner,3
2,2,21.01,3.5,Male,No,Sun,Dinner,3
3,3,23.68,3.31,Male,No,Sun,Dinner,2
4,4,24.59,3.61,Female,No,Sun,Dinner,4


## 06- Reverse Row Order

In [1]:
import pandas as pd
import seaborn as sns

df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [3]:
# Reversing Row wise
df.loc[::-1].head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True


In [4]:
# Reset the index number
df.loc[::-1].reset_index(drop=True).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True
1,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True
2,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False
3,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True
4,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True


## 07- Reverse Column Order

In [8]:
import pandas as pd
import seaborn as sns

df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [9]:
df.loc[:, ::-1].head()

Unnamed: 0,alone,alive,embark_town,deck,adult_male,who,class,embarked,fare,parch,sibsp,age,sex,pclass,survived
0,False,no,Southampton,,True,man,Third,S,7.25,0,1,22.0,male,3,0
1,False,yes,Cherbourg,C,False,woman,First,C,71.2833,0,1,38.0,female,1,1
2,True,yes,Southampton,,False,woman,Third,S,7.925,0,0,26.0,female,3,1
3,False,yes,Southampton,C,False,woman,First,S,53.1,0,1,35.0,female,1,1
4,True,no,Southampton,,True,man,Third,S,8.05,0,0,35.0,male,3,0


## 08- Select a Column by dtype

In [10]:
import pandas as pd
import seaborn as sns

df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [11]:
df.dtypes

survived          int64
pclass            int64
sex              object
age             float64
sibsp             int64
parch             int64
fare            float64
embarked         object
class          category
who              object
adult_male         bool
deck           category
embark_town      object
alive            object
alone              bool
dtype: object

In [12]:
# Only select those have numeric dtype
df.select_dtypes(include=['number']).head()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
0,0,3,22.0,1,0,7.25
1,1,1,38.0,1,0,71.2833
2,1,3,26.0,0,0,7.925
3,1,1,35.0,1,0,53.1
4,0,3,35.0,0,0,8.05


In [13]:
# Only select those have object dtype
df.select_dtypes(include=['object']).head()

Unnamed: 0,sex,embarked,who,embark_town,alive
0,male,S,man,Southampton,no
1,female,C,woman,Cherbourg,yes
2,female,S,woman,Southampton,yes
3,female,S,woman,Southampton,yes
4,male,S,man,Southampton,no


In [14]:
# Only select those have multiple dtype
df.select_dtypes(include=['object', 'number']).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,who,embark_town,alive
0,0,3,male,22.0,1,0,7.25,S,man,Southampton,no
1,1,1,female,38.0,1,0,71.2833,C,woman,Cherbourg,yes
2,1,3,female,26.0,0,0,7.925,S,woman,Southampton,yes
3,1,1,female,35.0,1,0,53.1,S,woman,Southampton,yes
4,0,3,male,35.0,0,0,8.05,S,man,Southampton,no


In [15]:
# exclude numeric dtype columns
df.select_dtypes(exclude=['number']).head()

Unnamed: 0,sex,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,male,S,Third,man,True,,Southampton,no,False
1,female,C,First,woman,False,C,Cherbourg,yes,False
2,female,S,Third,woman,False,,Southampton,yes,True
3,female,S,First,woman,False,C,Southampton,yes,False
4,male,S,Third,man,True,,Southampton,no,True


## 09- Convert String into Numeric

In [18]:
df = pd.DataFrame({
    'col_A': ['1','2','3','4','5'],
    'col_B': ['11','23','3','47','5']
})
df.head()

Unnamed: 0,col_A,col_B
0,1,11
1,2,23
2,3,3
3,4,47
4,5,5


In [19]:
df.dtypes

col_A    object
col_B    object
dtype: object

In [21]:
# Converting string into int
df.astype({'col_A': 'float64', 'col_B': 'int64'}).dtypes

col_A    float64
col_B      int64
dtype: object

In [22]:
pd.to_numeric(df['col_A'], errors='coerce')

0    1
1    2
2    3
3    4
4    5
Name: col_A, dtype: int64

## 10- Reduce DataFrame Size

In [23]:
df = sns.load_dataset('titanic')
df.shape

(891, 15)

In [24]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 313.7 KB


In [25]:
# Getting random 10% of whole data as Sample
df.sample(frac=0.1).shape

(89, 15)

In [27]:
df.sample(frac=0.1).info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 89 entries, 387 to 647
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     89 non-null     int64   
 1   pclass       89 non-null     int64   
 2   sex          89 non-null     object  
 3   age          65 non-null     float64 
 4   sibsp        89 non-null     int64   
 5   parch        89 non-null     int64   
 6   fare         89 non-null     float64 
 7   embarked     89 non-null     object  
 8   class        89 non-null     category
 9   who          89 non-null     object  
 10  adult_male   89 non-null     bool    
 11  deck         17 non-null     category
 12  embark_town  89 non-null     object  
 13  alive        89 non-null     object  
 14  alone        89 non-null     bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 32.9 KB


## 11- Copy Data from clip board

In [1]:
# Download Dataset
import seaborn as sns
import pandas as pd

df = sns.load_dataset('titanic')
df.to_excel('titanic.xlsx')

In [4]:
# Read clipboard in python
df1 = pd.read_clipboard()
df1.head()

# Saving clipboard data
df1.to_csv('excel_ka_data.csv')

## 12- Split DataFrame into two subsets

In [5]:
import seaborn as sns
import pandas as pd

df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [8]:
df.shape

(891, 15)

In [9]:
from random import random
kashti_1 = df.sample(frac=0.50, random_state=1)
kashti_1.shape

(446, 15)

In [10]:
kashti_2 = df.drop(kashti_1.index)
kashti_2.shape

(445, 15)

In [11]:
kashti_1.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
862,1,1,female,48.0,0,0,25.9292,S,First,woman,False,D,Southampton,yes,True
223,0,3,male,,0,0,7.8958,S,Third,man,True,,Southampton,no,True
84,1,2,female,17.0,0,0,10.5,S,Second,woman,False,,Southampton,yes,True
680,0,3,female,,0,0,8.1375,Q,Third,woman,False,,Queenstown,no,True
535,1,2,female,7.0,0,2,26.25,S,Second,child,False,,Southampton,yes,False


In [12]:
kashti_2.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
7,0,3,male,2.0,3,1,21.075,S,Third,child,False,,Southampton,no,False
10,1,3,female,4.0,1,1,16.7,S,Third,child,False,G,Southampton,yes,False
15,1,2,female,55.0,0,0,16.0,S,Second,woman,False,,Southampton,yes,True
18,0,3,female,31.0,1,0,18.0,S,Third,woman,False,,Southampton,no,False


## 13- Joining Two Datasets

In [13]:
# Appending both datasets
df1 = kashti_1.append(kashti_2)
df1.shape

(891, 15)

# 14- Filtering a Dataset

In [14]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [16]:
# finding unique values in sex column
df.sex.unique()

array(['male', 'female'], dtype=object)

In [18]:
# Filtering Female Data
df[(df.sex == 'female')]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False
882,0,3,female,22.0,0,0,10.5167,S,Third,woman,False,,Southampton,no,True
885,0,3,female,39.0,0,5,29.1250,Q,Third,woman,False,,Queenstown,no,False
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [23]:
df.embark_town.unique()

array(['Southampton', 'Cherbourg', 'Queenstown', nan], dtype=object)

In [24]:
# Multiple filtering
df[(df.embark_town == 'Southampton') & (df.sex == 'female')]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
10,1,3,female,4.0,1,1,16.7000,S,Third,child,False,G,Southampton,yes,False
11,1,1,female,58.0,0,0,26.5500,S,First,woman,False,C,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False
882,0,3,female,22.0,0,0,10.5167,S,Third,woman,False,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [26]:
df[((df.embark_town == 'Southampton') | (df.embark_town == 'Queenstown')) & 
(df.sex == 'female')]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
10,1,3,female,4.0,1,1,16.7000,S,Third,child,False,G,Southampton,yes,False
11,1,1,female,58.0,0,0,26.5500,S,First,woman,False,C,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False
882,0,3,female,22.0,0,0,10.5167,S,Third,woman,False,,Southampton,no,True
885,0,3,female,39.0,0,5,29.1250,Q,Third,woman,False,,Queenstown,no,False
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [28]:
# Another way of filtering
df[df.embark_town.isin(['Queenstown', "Southampton"])].head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True


In [29]:
df[df.age < 18].shape

(113, 15)

## 15- Filtering by Large Categories

In [31]:
df.shape

(891, 15)

In [32]:
# Getting values of each category
df.embark_town.value_counts()

Southampton    644
Cherbourg      168
Queenstown      77
Name: embark_town, dtype: int64

In [33]:
# How many male and females are there
df.sex.value_counts()

male      577
female    314
Name: sex, dtype: int64

In [35]:
# finding Largest categories i.e: age=24 are largest group
df.age.value_counts().nlargest(3)

24.0    30
22.0    27
18.0    26
Name: age, dtype: int64

In [36]:
# Top Three Age groups
counts = df.age.value_counts()
counts.nlargest(3).index

Float64Index([24.0, 22.0, 18.0], dtype='float64')

In [38]:
# Top Three Age groups
counts = df.who.value_counts()
counts.nlargest(3)

man      537
woman    271
child     83
Name: who, dtype: int64

In [39]:
# Filtering on based of 'who' column largst group i.e 'man'
df[df.who.isin(counts.nlargest(1).index)].head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
12,0,3,male,20.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


## 16-