### Table of Contents



#### 1. [The Basics](#content1)

#### 2. [Creating DataFrame](#content2)

#### 3. [Treating null values](#content3)

#### 4. [Modify/Add new column(s).](#content4)

#### 5. [Deleting columns](#content5)

#### 6. [Renaming columns](#content6)

#### 7.i. [Slicing DataFrame](#content7)

#### 7.ii. [Slicing using iloc and loc](#content8)

#### 8. [Adding a row](#content9)

#### 9. [Dropping row(s)](#content10)

#### 10. [Sorting](#content11)

#### 11. [Joins](#content12)

#### 12. [Groupby](#content13)

In [275]:
# import libraries
import pandas as pd
import numpy as np

In [276]:
# importing the data
df = pd.read_csv("titanic.csv")


<a id="content1"></a>
## 1. The Basics

In [277]:
# see the first 5 rows
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [278]:
# last 5 rows
df.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [279]:
# shape of the dataframe
# (n_samples, x features)
df.shape

(891, 12)

In [280]:
# list all the columns
df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [281]:
# rows index
df.index

RangeIndex(start=0, stop=891, step=1)

In [282]:
# value with their counts in a particular columns
df.Pclass.value_counts()

3    491
1    216
2    184
Name: Pclass, dtype: int64

In [283]:
# General description of the dataset
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


<a id='content2'></a>
## 2. Creating DataFrame

In [284]:
# create an empty data frame
df_empty = pd.DataFrame()
df_empty.head()

In [285]:
# From dict
student_info = {'Name':['Nishant','Sandeep','Ranjeev','Baaje','Rahmat'],
                'Age':[22,23,24,45,25],
                'Subject':['Data science','AI','CITS','civil','CSE']}

df_student = pd.DataFrame(student_info).reset_index(drop=True)
df_student.head()

Unnamed: 0,Name,Age,Subject
0,Nishant,22,Data science
1,Sandeep,23,AI
2,Ranjeev,24,CITS
3,Baaje,45,civil
4,Rahmat,25,CSE


<a id='content3'></a>
## 3. Treating null values

In [286]:
# check if the dataframe has null value or not
df.isna().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [287]:
## On a particular columns
df.Age.isna().sum()

177

Null value imputation

In [288]:
df["Age"].fillna(df["Age"].mean(), inplace=True)
df["Age"].isna().sum()

0

<a id="content4"></a>
## 4. Modify/Add new columns

In [289]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [290]:
## Male:0, female: 1

df["Sex"] = df["Sex"].map({"male":0,"female":1})
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.05,,S


In [291]:
### Finding First Name and Last name from Name columns

df["last_name"] = df["Name"].apply(lambda x: x.split(',')[0])
df["first_name"] = df["Name"].apply(lambda x: ' '.join(x.split(',')[1:]))


In [292]:
# sets to 1 for men in 3rd class
df['Third&Men'] = df.apply(lambda row: int(row['Pclass']==3 and row['Sex']=='0'), axis=1)

In [293]:
def findAgeGroup(age):
    if age < 18:
        return 1
    elif age >=18 and age<=40:
        return 2
    elif age >=40 and age <60:
        return 3
    else:
        return 4
df['Age_group'] = df['Age'].apply(lambda x: findAgeGroup(x))


In [294]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
0,1,0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.25,,S,Braund,Mr. Owen Harris,0,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,C85,C,Cumings,Mrs. John Bradley (Florence Briggs Thayer),0,2
2,3,1,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.925,,S,Heikkinen,Miss. Laina,0,2
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1,C123,S,Futrelle,Mrs. Jacques Heath (Lily May Peel),0,2
4,5,0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.05,,S,Allen,Mr. William Henry,0,2


<a id='content5'></a>
## 5. Deleting columns

In [295]:
df = df.drop(['PassengerId'], axis=1)
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
0,0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.25,,S,Braund,Mr. Owen Harris,0,2
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,C85,C,Cumings,Mrs. John Bradley (Florence Briggs Thayer),0,2
2,1,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.925,,S,Heikkinen,Miss. Laina,0,2
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1,C123,S,Futrelle,Mrs. Jacques Heath (Lily May Peel),0,2
4,0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.05,,S,Allen,Mr. William Henry,0,2


<a id='content6'></a>
## 6.Renaming columns

In [296]:
df = df.rename({'Sex':'Gender','Name':'Full Name', 'last_name':'SurName','first_name':'Name'})

In [297]:
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
0,0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.25,,S,Braund,Mr. Owen Harris,0,2
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,C85,C,Cumings,Mrs. John Bradley (Florence Briggs Thayer),0,2
2,1,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.925,,S,Heikkinen,Miss. Laina,0,2
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1,C123,S,Futrelle,Mrs. Jacques Heath (Lily May Peel),0,2
4,0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.05,,S,Allen,Mr. William Henry,0,2


<a id='content7'></a>
## 7. Slicing DataFrame

In [298]:
# all rows with pclass 3

df_third_class = df[df['Pclass']==3].reset_index(drop=True)


In [299]:
# Females with age > 60
df_aged = df[(df['Sex']=='1') & (df['Age'] > 60)].reset_index(drop=True)
df_aged

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group


In [300]:
# selecting some columns

df1 = df[['Age','Sex','Ticket']]

In [301]:
# select numerial columns only
numerics = ['int16','int32','int64','float16','float32','float64']
df_num = df.select_dtypes(numerics)
df_num.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Third&Men,Age_group
0,0,3,0,22.0,1,0,7.25,0,2
1,1,1,1,38.0,1,0,71.2833,0,2
2,1,3,1,26.0,0,0,7.925,0,2
3,1,1,1,35.0,1,0,53.1,0,2
4,0,3,0,35.0,0,0,8.05,0,2


In [302]:
# categorical
df_cat = df.select_dtypes('object')
df_cat.head()

Unnamed: 0,Name,Ticket,Cabin,Embarked,last_name,first_name
0,"Braund, Mr. Owen Harris",A/5 21171,,S,Braund,Mr. Owen Harris
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",PC 17599,C85,C,Cumings,Mrs. John Bradley (Florence Briggs Thayer)
2,"Heikkinen, Miss. Laina",STON/O2. 3101282,,S,Heikkinen,Miss. Laina
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",113803,C123,S,Futrelle,Mrs. Jacques Heath (Lily May Peel)
4,"Allen, Mr. William Henry",373450,,S,Allen,Mr. William Henry


<a id="content8"></a>
## 7.ii Slicing using iloc and loc

In [303]:
# first 100 rows and all cols

df_hund = df.iloc[:100,:]
df_hund.shape

(100, 15)

In [304]:
# first 250 rows with a subset of columns
df_sub = df.iloc[:250,1:8]

In [305]:
df_sub.head()

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket
0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171
1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599
2,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282
3,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803
4,3,"Allen, Mr. William Henry",0,35.0,0,0,373450


In [306]:
df_sub4 = df.loc[(df['Age']>50),['Sex','Age']]
df_sub4

Unnamed: 0,Sex,Age
6,0,54.0
11,1,58.0
15,1,55.0
33,0,66.0
54,0,65.0
...,...,...
820,1,52.0
829,1,62.0
851,0,74.0
857,0,51.0


<a id="content9"></a>
## 8. Adding a row

In [307]:
row=dict({'Age':24,'Name':'Peter','Survived':'Y'})
df = df.append(row, ignore_index=True)
df.tail()

  df = df.append(row, ignore_index=True)


Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
887,1,1.0,"Graham, Miss. Margaret Edith",1.0,19.0,0.0,0.0,112053,30.0,B42,S,Graham,Miss. Margaret Edith,0.0,2.0
888,0,3.0,"Johnston, Miss. Catherine Helen ""Carrie""",1.0,29.699118,1.0,2.0,W./C. 6607,23.45,,S,Johnston,"Miss. Catherine Helen ""Carrie""",0.0,2.0
889,1,1.0,"Behr, Mr. Karl Howell",0.0,26.0,0.0,0.0,111369,30.0,C148,C,Behr,Mr. Karl Howell,0.0,2.0
890,0,3.0,"Dooley, Mr. Patrick",0.0,32.0,0.0,0.0,370376,7.75,,Q,Dooley,Mr. Patrick,0.0,2.0
891,Y,,Peter,,24.0,,,,,,,,,,


<a id="content10"></a>
## 9. Dropping row(s)

In [308]:
df = df.drop(df.index[-1], axis=0)
df.tail()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
886,0,2.0,"Montvila, Rev. Juozas",0.0,27.0,0.0,0.0,211536,13.0,,S,Montvila,Rev. Juozas,0.0,2.0
887,1,1.0,"Graham, Miss. Margaret Edith",1.0,19.0,0.0,0.0,112053,30.0,B42,S,Graham,Miss. Margaret Edith,0.0,2.0
888,0,3.0,"Johnston, Miss. Catherine Helen ""Carrie""",1.0,29.699118,1.0,2.0,W./C. 6607,23.45,,S,Johnston,"Miss. Catherine Helen ""Carrie""",0.0,2.0
889,1,1.0,"Behr, Mr. Karl Howell",0.0,26.0,0.0,0.0,111369,30.0,C148,C,Behr,Mr. Karl Howell,0.0,2.0
890,0,3.0,"Dooley, Mr. Patrick",0.0,32.0,0.0,0.0,370376,7.75,,Q,Dooley,Mr. Patrick,0.0,2.0


In [309]:
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
0,0,3.0,"Braund, Mr. Owen Harris",0.0,22.0,1.0,0.0,A/5 21171,7.25,,S,Braund,Mr. Owen Harris,0.0,2.0
1,1,1.0,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1.0,38.0,1.0,0.0,PC 17599,71.2833,C85,C,Cumings,Mrs. John Bradley (Florence Briggs Thayer),0.0,2.0
2,1,3.0,"Heikkinen, Miss. Laina",1.0,26.0,0.0,0.0,STON/O2. 3101282,7.925,,S,Heikkinen,Miss. Laina,0.0,2.0
3,1,1.0,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1.0,35.0,1.0,0.0,113803,53.1,C123,S,Futrelle,Mrs. Jacques Heath (Lily May Peel),0.0,2.0
4,0,3.0,"Allen, Mr. William Henry",0.0,35.0,0.0,0.0,373450,8.05,,S,Allen,Mr. William Henry,0.0,2.0


<a id="content11"></a>
## 10. Sorting

In [310]:
# sorting age by decreasing order
df= df.sort_values(by=['Age'], ascending=False)
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
630,1,1.0,"Barkworth, Mr. Algernon Henry Wilson",0.0,80.0,0.0,0.0,27042,30.0,A23,S,Barkworth,Mr. Algernon Henry Wilson,0.0,4.0
851,0,3.0,"Svensson, Mr. Johan",0.0,74.0,0.0,0.0,347060,7.775,,S,Svensson,Mr. Johan,0.0,4.0
96,0,1.0,"Goldschmidt, Mr. George B",0.0,71.0,0.0,0.0,PC 17754,34.6542,A5,C,Goldschmidt,Mr. George B,0.0,4.0
493,0,1.0,"Artagaveytia, Mr. Ramon",0.0,71.0,0.0,0.0,PC 17609,49.5042,,C,Artagaveytia,Mr. Ramon,0.0,4.0
116,0,3.0,"Connors, Mr. Patrick",0.0,70.5,0.0,0.0,370369,7.75,,Q,Connors,Mr. Patrick,0.0,4.0


<a id="content12"></a>
## 11. Joins

Let's create two separate data frames and try all 5 (Cross,Inner, Left, Right,and Outer Joins).

In [311]:
sno = [i+1 for i in range(100)]
marks = np.random.randint(100, size=100)
marks_df =pd.DataFrame({'sno':sno,'Marks':marks})
marks_df.head()

Unnamed: 0,sno,Marks
0,1,75
1,2,54
2,3,50
3,4,37
4,5,92


In [312]:
sno = [i+1 for i in range(100)]
age = np.random.randint(100, size=100)
age_df = pd.DataFrame({'sno':sno,'Age':age})
age_df.head()

Unnamed: 0,sno,Age
0,1,60
1,2,27
2,3,42
3,4,15
4,5,45


In [313]:
## Cross join

In [314]:
cross_join = pd.merge(marks_df, age_df, how='cross')


In [315]:
# inner join
inner_join = pd.merge(marks_df, age_df, how='inner', on='sno')
inner_join.head()

Unnamed: 0,sno,Marks,Age
0,1,75,60
1,2,54,27
2,3,50,42
3,4,37,15
4,5,92,45


In [316]:
# left and right outer joins

In [317]:
age_df.loc[len(age_df.index)]={'Sno':101,'Age':23}
age_df.loc[len(age_df.index)]={'Sno':102,'Age':27}
age_df.loc[len(age_df.index)]={'Sno':104,'Age':29}
age_df.loc[len(age_df.index)]={'Sno':103,'Age':32}
age_df.loc[len(age_df.index)]={'Sno':105,'Age':53}

In [318]:
left_join = pd.merge(age_df, marks_df, how='left',on='sno')
left_join.tail()

Unnamed: 0,sno,Age,Marks
100,,23,
101,,27,
102,,29,
103,,32,
104,,53,


 outer code is same as inner

<a id="content13"></a>
## 12. Groupby

Let's get back to our titanic dataset.

In [319]:
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
630,1,1.0,"Barkworth, Mr. Algernon Henry Wilson",0.0,80.0,0.0,0.0,27042,30.0,A23,S,Barkworth,Mr. Algernon Henry Wilson,0.0,4.0
851,0,3.0,"Svensson, Mr. Johan",0.0,74.0,0.0,0.0,347060,7.775,,S,Svensson,Mr. Johan,0.0,4.0
96,0,1.0,"Goldschmidt, Mr. George B",0.0,71.0,0.0,0.0,PC 17754,34.6542,A5,C,Goldschmidt,Mr. George B,0.0,4.0
493,0,1.0,"Artagaveytia, Mr. Ramon",0.0,71.0,0.0,0.0,PC 17609,49.5042,,C,Artagaveytia,Mr. Ramon,0.0,4.0
116,0,3.0,"Connors, Mr. Patrick",0.0,70.5,0.0,0.0,370369,7.75,,Q,Connors,Mr. Patrick,0.0,4.0


In [320]:
groups = df.groupby(['Pclass'])

groups.get_group(1)

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,first_name,Third&Men,Age_group
630,1,1.0,"Barkworth, Mr. Algernon Henry Wilson",0.0,80.00,0.0,0.0,27042,30.0000,A23,S,Barkworth,Mr. Algernon Henry Wilson,0.0,4.0
96,0,1.0,"Goldschmidt, Mr. George B",0.0,71.00,0.0,0.0,PC 17754,34.6542,A5,C,Goldschmidt,Mr. George B,0.0,4.0
493,0,1.0,"Artagaveytia, Mr. Ramon",0.0,71.00,0.0,0.0,PC 17609,49.5042,,C,Artagaveytia,Mr. Ramon,0.0,4.0
745,0,1.0,"Crosby, Capt. Edward Gifford",0.0,70.00,1.0,1.0,WE/P 5735,71.0000,B22,S,Crosby,Capt. Edward Gifford,0.0,4.0
456,0,1.0,"Millet, Mr. Francis Davis",0.0,65.00,0.0,0.0,13509,26.5500,E38,S,Millet,Mr. Francis Davis,0.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435,1,1.0,"Carter, Miss. Lucile Polk",1.0,14.00,1.0,2.0,113760,120.0000,B96 B98,S,Carter,Miss. Lucile Polk,0.0,1.0
802,1,1.0,"Carter, Master. William Thornton II",0.0,11.00,1.0,2.0,113760,120.0000,B96 B98,S,Carter,Master. William Thornton II,0.0,1.0
445,1,1.0,"Dodge, Master. Washington",0.0,4.00,0.0,2.0,33638,81.8583,A34,S,Dodge,Master. Washington,0.0,1.0
297,0,1.0,"Allison, Miss. Helen Loraine",1.0,2.00,1.0,2.0,113781,151.5500,C22 C26,S,Allison,Miss. Helen Loraine,0.0,1.0


In [321]:
# Average age per Pclass
df_grp1 = df.groupby(['Pclass'])
df_grp1['Age'].mean()

Pclass
1.0    37.048118
2.0    29.866958
3.0    26.403259
Name: Age, dtype: float64

In [322]:
## Min max

df_grp1['Age'].min()

Pclass
1.0    0.92
2.0    0.67
3.0    0.42
Name: Age, dtype: float64

In [323]:
df_grp1['Age'].max()

Pclass
1.0    80.0
2.0    70.0
3.0    74.0
Name: Age, dtype: float64

In [324]:
df_grp1['Age'].count()

Pclass
1.0    216
2.0    184
3.0    491
Name: Age, dtype: int64

In [326]:
param = {'Age': lambda x: np.mean(x)}

In [333]:
# using agg() function
df_grp2 = df.groupby(['Pclass']).agg(param)

In [334]:
df_grp2

Unnamed: 0_level_0,Age
Pclass,Unnamed: 1_level_1
1.0,37.048118
2.0,29.866958
3.0,26.403259


In [336]:
## Min. Max, count, sum, for each Pclass using


df_grp3 = df.groupby(['Pclass']).agg({'Age':'min'}).rename(columns={'Age':'Min Age'})
df_grp3

Unnamed: 0_level_0,Min Age
Pclass,Unnamed: 1_level_1
1.0,0.92
2.0,0.67
3.0,0.42


In [337]:
# Name of the passengers in that class

df_grp4 = df.groupby(['Pclass']).agg({'Name': lambda x: ' '.join(x)})

In [338]:
df_grp4

Unnamed: 0_level_0,Name
Pclass,Unnamed: 1_level_1
1.0,"Barkworth, Mr. Algernon Henry Wilson Goldschmi..."
2.0,"Mitchell, Mr. Henry Michael Wheadon, Mr. Edwar..."
3.0,"Svensson, Mr. Johan Connors, Mr. Patrick Duane..."
