# Groupby Module

The group by object is created from the dataframe and it creates essentially groupings or categories or chunks or segments from the dataframe. 

In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.set_option("precision", 2)

In [3]:
df = pd.read_csv("employees.csv", parse_dates=["Start Date"])
df.sort_values("First Name", inplace=True, ignore_index=True)
df.tail(20)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
980,,Male,1990-05-23,2:09 AM,136655,9.8,,Distribution
981,,Male,1995-04-07,4:32 AM,60411,12.05,,Human Resources
982,,Male,2009-06-17,1:00 PM,136681,3.65,,Business Development
983,,Female,1994-12-18,8:42 PM,138807,1.4,,Human Resources
984,,Male,1986-10-24,9:23 AM,47176,10.74,,Finance
985,,Female,2000-06-18,7:36 AM,106428,10.87,,
986,,Male,1986-05-06,4:41 PM,74104,17.68,,Client Services
987,,,1991-04-15,3:39 AM,132505,13.59,,Product
988,,Female,1982-07-24,12:00 AM,57811,8.94,,Marketing
989,,Male,1994-12-13,10:34 AM,141311,5.48,,Product


In [4]:
df["Senior Management"] = df["Senior Management"].astype("boolean")
df["Gender"] = df["Gender"].astype("category")

In [5]:
mask_1 = df["Senior Management"].isna()
mask_2 = df["First Name"].isna()
df.drop(df[mask_1 & mask_2].index, inplace=True)
df.tail()

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
928,Willie,Male,2009-08-22,1:03 PM,55038,19.69,False,Legal
929,Willie,Male,2003-11-27,6:21 AM,64363,4.02,False,Marketing
930,Willie,Male,1998-02-17,8:20 PM,146651,1.45,True,Engineering
931,Willie,Male,2006-06-06,9:45 AM,55281,4.93,True,Marketing
932,Willie,Male,2009-12-05,5:39 AM,141932,1.02,True,Engineering


In [6]:
df.rename(columns={'Senior Management': 'SnrMgmt', 'Bonus %': 'BonusPct'}, inplace=True)
df.head()

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,BonusPct,SnrMgmt,Team
0,Aaron,Male,2012-02-17,10:20 AM,61602,11.85,True,Marketing
1,Aaron,Male,1994-01-29,6:48 PM,58755,5.1,True,Marketing
2,Aaron,Male,1990-07-22,2:53 PM,52119,11.34,True,Client Services
3,Aaron,,1986-01-22,7:39 PM,63126,18.42,False,Client Services
4,Adam,Male,2011-05-21,1:45 AM,95327,15.12,False,Distribution


In [7]:
np.random.seed(42)
df['age'] = np.random.randint(low=20, high=60, size=df.shape[0])

## Examining the groupby object

We will call the `.first()`, `.last()`, and `.size()` methods on a GroupBy object to gain a better understanding of its internal data structure. 

In [8]:
gender = df.groupby("Gender")

In [9]:
type(gender)

pandas.core.groupby.generic.DataFrameGroupBy

Use the `dir` function to discover the attributes and methods of a `groupby` object

In [10]:
[attr for attr in dir(gender) if not attr.startswith('_')]

['BonusPct',
 'Gender',
 'Salary',
 'SnrMgmt',
 'Team',
 'age',
 'agg',
 'aggregate',
 'all',
 'any',
 'apply',
 'backfill',
 'bfill',
 'boxplot',
 'corr',
 'corrwith',
 'count',
 'cov',
 'cumcount',
 'cummax',
 'cummin',
 'cumprod',
 'cumsum',
 'describe',
 'diff',
 'dtypes',
 'ewm',
 'expanding',
 'ffill',
 'fillna',
 'filter',
 'first',
 'get_group',
 'groups',
 'head',
 'hist',
 'idxmax',
 'idxmin',
 'indices',
 'last',
 'mad',
 'max',
 'mean',
 'median',
 'min',
 'ndim',
 'ngroup',
 'ngroups',
 'nth',
 'nunique',
 'ohlc',
 'pad',
 'pct_change',
 'pipe',
 'plot',
 'prod',
 'quantile',
 'rank',
 'resample',
 'rolling',
 'sample',
 'sem',
 'shift',
 'size',
 'skew',
 'std',
 'sum',
 'tail',
 'take',
 'transform',
 'tshift',
 'var']

**To find the uniquely identifying labels for each group, look in the `.groups` attribute, which contains a dictionary of each unique group mapped to all the corresponding index labels of that group.** 

In [11]:
gender.groups

{'Female': [17, 18, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 39, 40, 41, 42, 43, 46, 47, 48, 49, 50, 51, 52, 54, 55, 57, 58, 59, 60, 62, 63, 75, 76, 77, 78, 79, 80, 82, 84, 85, 88, 94, 95, 96, 97, 98, 99, 101, 102, 115, 116, 117, 118, 119, 128, 129, 130, 133, 151, 152, 153, 154, 155, 156, 157, 163, 164, 165, 166, 167, 170, 171, 172, 173, 175, 195, 197, 198, 199, 200, 201, 202, 203, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, ...], 'Male': [0, 1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 44, 45, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 90, 91, 92, 93, 103, 104, 106, 107, 108, 110, 111, 112, 113, 121, 122, 125, 127, 134, 135, 136, 137, 138, 140, 142, 144, 145, 147, 148, 149, 150, 158, 159, 160, 162, 168, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 204, 205, 206, 207, 225, 235, 236, 237, 247, 248, 249, 251, 252, 253, 254, 255, 256, 257, 258, 266, ...]}

In [12]:
gender.ngroups # similar to df['Gender'].unique()

2

In [13]:
gender.size() # similar to df['Gender'].value_counts() 

Gender
Female    400
Male      395
dtype: int64

In [14]:
# display first row for each group
gender.first()

Unnamed: 0_level_0,First Name,Start Date,Last Login Time,Salary,BonusPct,SnrMgmt,Team,age
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Female,Alice,2004-10-05,9:34 AM,47638,11.21,False,Human Resources,43
Male,Aaron,2012-02-17,10:20 AM,61602,11.85,True,Marketing,58


**Retrieve a single group with the `.get_group()` method by passing it a tuple of an exact group label.**

In [15]:
gender.get_group('Female')  # similar to df.loc[df['Gender']=='Female']

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,BonusPct,SnrMgmt,Team,age
17,Alice,Female,2004-10-05,9:34 AM,47638,11.21,False,Human Resources,43
18,Alice,Female,1988-09-03,8:54 PM,63571,15.40,True,Product,49
20,Alice,Female,1986-05-02,1:50 AM,51395,2.38,True,Finance,21
21,Alice,Female,2013-02-09,12:49 AM,121250,4.36,True,Finance,40
22,Alice,Female,2016-01-21,5:07 PM,117787,10.48,False,,52
...,...,...,...,...,...,...,...,...,...
894,Tina,Female,1999-08-06,3:18 AM,102841,3.37,False,Client Services,30
911,Virginia,Female,1999-10-20,6:23 AM,46905,19.15,False,Distribution,58
912,Virginia,Female,2010-05-02,9:10 PM,123649,10.15,True,Marketing,59
919,Wanda,Female,2008-07-20,1:44 PM,65362,7.13,True,Legal,52


### Iterating through Groups

In [16]:
from IPython.display import display
for name, group in gender:
    print(name)
    display(group.head(3))

Female


Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,BonusPct,SnrMgmt,Team,age
17,Alice,Female,2004-10-05,9:34 AM,47638,11.21,False,Human Resources,43
18,Alice,Female,1988-09-03,8:54 PM,63571,15.4,True,Product,49
20,Alice,Female,1986-05-02,1:50 AM,51395,2.38,True,Finance,21


Male


Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,BonusPct,SnrMgmt,Team,age
0,Aaron,Male,2012-02-17,10:20 AM,61602,11.85,True,Marketing,58
1,Aaron,Male,1994-01-29,6:48 PM,58755,5.1,True,Marketing,48
2,Aaron,Male,1990-07-22,2:53 PM,52119,11.34,True,Client Services,34


### Que: Find the name of the person who have maximum salary in the team (department)

We'll loop over all of our gropings to extract selected rows from each inner DataFrame. We'll append these rows to a running DataFrame and then view the final result.

In [17]:
df_team = pd.DataFrame(columns=df.columns)
df_team

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,BonusPct,SnrMgmt,Team,age


In [18]:
for team, data in df.groupby("Team"):
    highest_salary_name_in_group = data.nlargest(1, "Salary")
    df_team = df_team.append(highest_salary_name_in_group)

df_team    

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,BonusPct,SnrMgmt,Team,age
326,Harold,Male,2010-04-16,5:13 AM,147417,11.63,True,Business Development,48
569,Lois,Female,2011-11-09,7:06 AM,147183,10.0,True,Client Services,51
43,Andrea,Female,1999-07-22,9:25 AM,149105,13.71,True,Distribution,39
762,Ruby,Female,1999-05-01,3:36 AM,147362,7.85,True,Engineering,47
494,Katherine,Female,1996-08-13,12:21 AM,149908,18.91,False,Finance,38
752,Rose,Female,2015-05-28,8:40 AM,149903,5.63,False,Human Resources,36
373,James,Male,1993-01-15,5:19 PM,148985,19.28,False,Legal,45
774,Russell,,2009-05-09,11:59 AM,149456,3.53,False,Marketing,54
201,Cynthia,Female,2006-07-12,8:55 AM,149684,7.86,False,Product,35
60,Annie,Female,1993-01-30,2:05 AM,144887,8.28,True,Sales,29


In [19]:
gender.mean()

Unnamed: 0_level_0,Salary,BonusPct,SnrMgmt,age
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,89605.73,10.0,0.5,39.41
Male,90738.87,10.38,0.5,39.78


In [20]:
gender.describe().T
# gender.describe().unstack().to_frame(name='Value')

Unnamed: 0,Gender,Female,Male
Salary,count,400.0,395.0
Salary,mean,89605.73,90738.87
Salary,std,33635.2,32316.64
Salary,min,35381.0,35013.0
Salary,25%,58977.0,63220.5
Salary,50%,89438.5,89854.0
Salary,75%,117849.5,118743.0
Salary,max,149908.0,148985.0
BonusPct,count,400.0,395.0
BonusPct,mean,10.0,10.38


In [21]:
gender.describe(include='O')

Unnamed: 0_level_0,First Name,First Name,First Name,First Name,Last Login Time,Last Login Time,Last Login Time,Last Login Time,Team,Team,Team,Team
Unnamed: 0_level_1,count,unique,top,freq,count,unique,top,freq,count,unique,top,freq
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Female,400,100,Marilyn,10,400,355,11:57 PM,3,393,10,Business Development,49
Male,395,100,Todd,9,395,350,11:25 AM,3,371,10,Human Resources,43


### Grouping by Multiple columns and `.agg()` Method

We'll invoke the `.agg()` method on our GroupBy object to apply a different aggregation operation to each inner column.

In [22]:
gt = df.groupby(["Gender", "Team"])

In [23]:
gt.agg({'Salary': 'mean', 'BonusPct': ['min', 'max'], 'age': ['min', 'max', 'mean']})

Unnamed: 0_level_0,Unnamed: 1_level_0,Salary,BonusPct,BonusPct,age,age,age
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,min,max,min,max,mean
Gender,Team,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Female,Business Development,92645.02,1.03,19.63,20,58,36.33
Female,Client Services,85742.72,1.76,19.73,21,59,40.11
Female,Distribution,79812.16,2.31,19.49,20,58,38.68
Female,Engineering,89708.14,1.48,19.85,21,59,40.12
Female,Finance,93483.33,1.55,19.78,20,59,41.0
Female,Human Resources,89835.88,2.3,19.61,20,58,37.88
Female,Legal,91749.24,1.08,16.62,20,59,39.0
Female,Marketing,97116.56,1.45,19.77,20,59,40.47
Female,Product,86333.7,1.26,19.26,20,58,38.84
Female,Sales,90055.63,2.78,19.02,20,58,40.6


# Pivot Table

In [24]:
df.pivot_table(values='Salary', index='Team', columns='Gender')

Gender,Female,Male
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Business Development,92645.02,87851.0
Client Services,85742.72,93781.79
Distribution,79812.16,92302.38
Engineering,89708.14,99937.03
Finance,93483.33,95663.84
Human Resources,89835.88,92149.98
Legal,91749.24,84491.88
Marketing,97116.56,84745.95
Product,86333.7,87615.44
Sales,90055.63,93303.78


In [25]:
age_group = pd.cut(df['age'], bins=range(20, 61, 10))

In [26]:
age_group

0      (50, 60]
1      (40, 50]
2      (30, 40]
3      (20, 30]
4      (30, 40]
         ...   
928    (20, 30]
929    (20, 30]
930    (20, 30]
931    (20, 30]
932    (20, 30]
Name: age, Length: 933, dtype: category
Categories (4, interval[int64]): [(20, 30] < (30, 40] < (40, 50] < (50, 60]]

In [27]:
df.pivot_table(values='Salary', index='Team', columns=['Gender', age_group])

Gender,Female,Female,Female,Female,Male,Male,Male,Male
age,"(20, 30]","(30, 40]","(40, 50]","(50, 60]","(20, 30]","(30, 40]","(40, 50]","(50, 60]"
Team,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Business Development,87833.54,93087.25,99115.4,116042.0,79926.62,82691.5,97731.83,87453.22
Client Services,88049.53,93085.67,80740.27,83569.92,101362.62,88141.0,89911.57,96156.25
Distribution,74433.33,76185.73,106926.0,84544.5,104061.0,92274.11,81798.0,92698.2
Engineering,76737.89,102062.67,96075.08,78915.6,102652.8,85796.6,120061.5,90858.9
Finance,91009.12,92198.62,108301.78,85787.93,99287.75,86057.43,104171.33,95600.62
Human Resources,93248.0,95470.5,85352.8,95419.67,84365.25,96170.55,89432.89,96786.31
Legal,90839.11,105895.25,86804.22,89074.33,82268.38,97427.4,87140.58,79950.88
Marketing,103535.8,87540.93,105017.5,97023.89,78532.8,81770.0,85514.33,93481.86
Product,71646.55,100598.82,93771.31,78796.5,94904.14,101990.0,84843.0,82861.23
Sales,99858.4,60819.2,94442.67,95196.1,111206.88,83729.0,83749.92,100947.71


In [28]:
bonus_partition = pd.qcut(df['BonusPct'], 3, labels=["bad", "medium", "good"])

In [29]:
df['BonusPct']

0      11.85
1       5.10
2      11.34
3      18.42
4      15.12
       ...  
928    19.69
929     4.02
930     1.45
931     4.93
932     1.02
Name: BonusPct, Length: 933, dtype: float64

In [30]:
bonus_partition

0      medium
1         bad
2      medium
3        good
4        good
        ...  
928      good
929       bad
930       bad
931       bad
932       bad
Name: BonusPct, Length: 933, dtype: category
Categories (3, object): ['bad' < 'medium' < 'good']

In [45]:
df.pivot_table(values='Salary', 
               index=[bonus_partition, 'Team'], 
               columns=['Gender', age_group],
               aggfunc='mean',               
              )

Unnamed: 0_level_0,Gender,Female,Female,Female,Female,Male,Male,Male,Male
Unnamed: 0_level_1,age,"(20, 30]","(30, 40]","(40, 50]","(50, 60]","(20, 30]","(30, 40]","(40, 50]","(50, 60]"
BonusPct,Team,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
bad,Business Development,101235.57,101468.25,83220.0,95356.75,72082.25,76242.0,89488.0,103552.0
bad,Client Services,84869.75,,91807.83,94788.5,103657.0,92799.17,85701.67,106472.0
bad,Distribution,72835.5,52051.33,69655.5,85855.0,90887.0,91156.6,56769.33,140444.0
bad,Engineering,88328.0,134673.0,97338.57,88432.75,116077.6,58399.5,112253.25,76760.5
bad,Finance,87694.86,121095.5,120995.67,58038.25,148115.0,78417.0,109312.33,95728.0
bad,Human Resources,94833.33,149903.0,90217.38,99897.5,101036.0,78728.2,90081.83,81592.5
bad,Legal,86673.5,107478.0,88830.0,78803.33,94573.0,117518.5,86660.71,71683.57
bad,Marketing,129634.0,80998.33,119378.67,87021.25,75527.83,123329.5,85039.67,111675.17
bad,Product,59007.0,91096.8,97072.14,82493.5,71312.25,95192.0,81050.33,74434.33
bad,Sales,100050.75,48117.0,102985.0,80535.67,102343.5,65115.67,89879.33,83991.25


In [32]:
df.pivot_table(index='Team',
               columns='Gender',
               aggfunc={
                   'BonusPct': ['min', 'max', 'mean'],
                   'Salary': ['min', 'max', 'mean']
               }
)

Unnamed: 0_level_0,BonusPct,BonusPct,BonusPct,BonusPct,BonusPct,BonusPct,Salary,Salary,Salary,Salary,Salary,Salary
Unnamed: 0_level_1,max,max,mean,mean,min,min,max,max,mean,mean,min,min
Gender,Female,Male,Female,Male,Female,Male,Female,Male,Female,Male,Female,Male
Team,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
Business Development,19.63,18.84,10.43,10.84,1.03,2.08,147183.0,147417.0,92645.02,87851.0,36844.0,37385.0
Client Services,19.73,19.89,10.26,10.45,1.76,1.01,147183.0,144082.0,85742.72,93781.79,36927.0,35095.0
Distribution,19.49,19.91,9.72,8.69,2.31,1.68,149105.0,146141.0,79812.16,92302.38,35575.0,35633.0
Engineering,19.85,19.72,9.7,10.7,1.48,1.02,147362.0,146907.0,89708.14,99937.03,36946.0,38041.0
Finance,19.78,19.93,9.11,10.66,1.55,1.15,149908.0,148225.0,93483.33,95663.84,35381.0,36749.0
Human Resources,19.61,19.28,10.58,9.99,2.3,1.56,149903.0,146670.0,89835.88,92149.98,35477.0,35203.0
Legal,16.62,19.94,9.46,11.83,1.08,1.19,147113.0,148985.0,91749.24,84491.88,42090.0,35061.0
Marketing,19.77,19.93,10.43,10.46,1.45,1.26,145988.0,146812.0,97116.56,84745.95,36643.0,36837.0
Product,19.26,18.64,9.74,9.68,1.26,1.11,149684.0,148941.0,86333.7,87615.44,36067.0,35013.0
Sales,19.02,18.9,10.15,9.99,2.78,1.22,144887.0,142178.0,90055.63,93303.78,35884.0,35802.0


# Use Case: `groupby()` and `apply()` 

Imputing the value in Team column based on the corresponding value in Gender and Senior Management columns. 

In [33]:
first_name = df.groupby("First Name")

In [34]:
name_mode_dict = first_name["Gender"].agg(pd.Series.mode).to_dict()

In [35]:
def cust_fillna_gender(row):
    name = row[0]
    gender = row[1]    
    if pd.isna(gender):
        gender = name_mode_dict[name]
    return gender

In [36]:
df["Gender"] = df.apply(cust_fillna_gender, axis="columns")
df.isna().sum()

First Name          0
Gender              0
Start Date          0
Last Login Time     0
Salary              0
BonusPct            0
SnrMgmt             0
Team               34
age                 0
dtype: int64

In [37]:
gm = df.groupby(["Gender", "SnrMgmt"])

In [38]:
gm['Team'].agg(pd.Series.mode)["Female", True]

'Business Development'

In [39]:
gm['Team'].agg(pd.Series.mode)

Gender  SnrMgmt
Female  False           Client Services
        True       Business Development
Male    False                     Legal
        True                      Sales
Name: Team, dtype: object

In [40]:
def cust_fillna_team(row):
    gender = row[1]
    sm = row[6]
    team = row[7]
    if pd.isna(team):
        team = gm['Team'].agg(pd.Series.mode)[gender, sm]        
    return team

In [41]:
df.apply(cust_fillna_team, axis='columns')[df.loc[df['Team'].isna()].index]

22          Client Services
70                    Sales
111                   Legal
112                   Sales
144                   Legal
162                   Legal
168                   Legal
181                   Legal
206                   Sales
237                   Legal
257                   Sales
332                   Legal
369                   Legal
389                   Sales
404                   Sales
422                   Legal
427                   Legal
447                   Sales
453                   Sales
461    Business Development
534    Business Development
547                   Legal
580    Business Development
640                   Legal
646    Business Development
655         Client Services
681                   Sales
689                   Sales
712                   Sales
724                   Sales
783                   Legal
882                   Sales
897                   Sales
920         Client Services
dtype: object

In [42]:
df.loc[df['Team'].isna()]

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,BonusPct,SnrMgmt,Team,age
22,Alice,Female,2016-01-21,5:07 PM,117787,10.48,False,,52
70,Antonio,Male,1999-06-06,10:54 PM,41928,5.48,True,,35
111,Bobby,Male,1996-03-31,5:40 PM,112117,6.34,False,,33
112,Bobby,Male,1996-08-19,1:16 AM,147842,16.16,True,,22
144,Carl,Male,1987-03-30,5:59 PM,75598,19.29,False,,39
162,Charles,Male,1997-09-03,10:04 AM,148291,6.0,False,,43
168,Chris,Male,2006-12-12,1:57 AM,71642,1.5,False,,58
181,Christopher,Male,2000-04-22,10:15 AM,37919,11.45,False,,38
206,Daniel,Male,2016-02-29,4:04 AM,77287,13.0,True,,38
237,Donald,Male,1988-04-06,10:00 AM,122920,5.32,False,,38


In [43]:
df['Team'] = df.apply(cust_fillna_team, axis='columns')

In [44]:
df.isna().sum()

First Name         0
Gender             0
Start Date         0
Last Login Time    0
Salary             0
BonusPct           0
SnrMgmt            0
Team               0
age                0
dtype: int64