In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('titanic.csv')

In [3]:
df['Age'] # This works well if there are spaces in the column name

0       22.0
1       38.0
2       26.0
3       35.0
4       35.0
        ... 
1304     NaN
1305    39.0
1306    38.5
1307     NaN
1308     NaN
Name: Age, Length: 1309, dtype: float64

In [4]:
# or also by using the dot operator
df.Age

0       22.0
1       38.0
2       26.0
3       35.0
4       35.0
        ... 
1304     NaN
1305    39.0
1306    38.5
1307     NaN
1308     NaN
Name: Age, Length: 1309, dtype: float64

or multiple columns at once

In [5]:
df[['Name', 'Parch', 'Sex']]

Unnamed: 0,Name,Parch,Sex
0,"Braund, Mr. Owen Harris",0,male
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,female
2,"Heikkinen, Miss. Laina",0,female
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,female
4,"Allen, Mr. William Henry",0,male
...,...,...,...
1304,"Spector, Mr. Woolf",0,male
1305,"Oliva y Ocana, Dona. Fermina",0,female
1306,"Saether, Mr. Simon Sivertsen",0,male
1307,"Ware, Mr. Frederick",0,male


To select one or more rows we use iloc (index locator)

In [6]:
df.iloc[0]

Unnamed: 0                          0
Cabin                             NaN
Embarked                            S
Fare                             7.25
Pclass                              3
Ticket                      A/5 21171
Age                                22
Name          Braund, Mr. Owen Harris
Parch                               0
Sex                              male
SibSp                               1
Survived                            0
Name: 0, dtype: object

A number of rows

In [7]:
df.iloc[[0,1,2]]

Unnamed: 0.1,Unnamed: 0,Cabin,Embarked,Fare,Pclass,Ticket,Age,Name,Parch,Sex,SibSp,Survived
0,0,,S,7.25,3,A/5 21171,22.0,"Braund, Mr. Owen Harris",0,male,1,0.0
1,1,C85,C,71.2833,1,PC 17599,38.0,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,female,1,1.0
2,2,,S,7.925,3,STON/O2. 3101282,26.0,"Heikkinen, Miss. Laina",0,female,0,1.0


How many rows are in the sample?

In [9]:
columns = df.columns # Extract the list of columns
print(columns)

Index(['Unnamed: 0', 'Cabin', 'Embarked', 'Fare', 'Pclass', 'Ticket', 'Age',
       'Name', 'Parch', 'Sex', 'SibSp', 'Survived'],
      dtype='object')


In [10]:
df[columns[1:4]] # Columns 2, 3, 4

Unnamed: 0,Cabin,Embarked,Fare
0,,S,7.2500
1,C85,C,71.2833
2,,S,7.9250
3,C123,S,53.1000
4,,S,8.0500
...,...,...,...
1304,,S,8.0500
1305,C105,C,108.9000
1306,,S,7.2500
1307,,S,8.0500


In [8]:
len(df)

1309

We don't need to hard code our selections either.  We can get a list of columns and feed that into the selection.  Say we wanted all but the first column.

In [9]:
columns = df.columns # Extract the list of columns
print(columns)

Index(['Unnamed: 0', 'Cabin', 'Embarked', 'Fare', 'Pclass', 'Ticket', 'Age',
       'Name', 'Parch', 'Sex', 'SibSp', 'Survived'],
      dtype='object')


In [10]:
df[columns[1:]] # Using the standard Python slicing methods i.e. from column 1 to end

Unnamed: 0,Cabin,Embarked,Fare,Pclass,Ticket,Age,Name,Parch,Sex,SibSp,Survived
0,,S,7.2500,3,A/5 21171,22.0,"Braund, Mr. Owen Harris",0,male,1,0.0
1,C85,C,71.2833,1,PC 17599,38.0,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,female,1,1.0
2,,S,7.9250,3,STON/O2. 3101282,26.0,"Heikkinen, Miss. Laina",0,female,0,1.0
3,C123,S,53.1000,1,113803,35.0,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,female,1,1.0
4,,S,8.0500,3,373450,35.0,"Allen, Mr. William Henry",0,male,0,0.0
5,,Q,8.4583,3,330877,,"Moran, Mr. James",0,male,0,0.0
6,E46,S,51.8625,1,17463,54.0,"McCarthy, Mr. Timothy J",0,male,0,0.0
7,,S,21.0750,3,349909,2.0,"Palsson, Master. Gosta Leonard",1,male,3,0.0
8,,S,11.1333,3,347742,27.0,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",2,female,0,1.0
9,,C,30.0708,2,237736,14.0,"Nasser, Mrs. Nicholas (Adele Achem)",0,female,1,1.0


In [11]:
df[columns[1:4]] # Columns 2, 3, 4

Unnamed: 0,Cabin,Embarked,Fare
0,,S,7.2500
1,C85,C,71.2833
2,,S,7.9250
3,C123,S,53.1000
4,,S,8.0500
5,,Q,8.4583
6,E46,S,51.8625
7,,S,21.0750
8,,S,11.1333
9,,C,30.0708


In [12]:
df[columns[::2]] # Every second column

Unnamed: 0.1,Unnamed: 0,Embarked,Pclass,Age,Parch,SibSp
0,0,S,3,22.0,0,1
1,1,C,1,38.0,0,1
2,2,S,3,26.0,0,0
3,3,S,1,35.0,0,1
4,4,S,3,35.0,0,0
5,5,Q,3,,0,0
6,6,S,1,54.0,0,0
7,7,S,3,2.0,1,3
8,8,S,3,27.0,2,0
9,9,C,2,14.0,0,1


How many rows are there?

What if we wanted the value of Fare at row 2?  Where can do this a few of ways:

In [13]:
df.iloc[2]['Fare'] # Row centric

7.925

In [14]:
df.iloc[2].Fare # Row centric

7.925

In [15]:
df['Fare'][2] # Column centric

7.925

In [16]:
df.Fare[2] # Column centric

7.925