# Pandas DataFrames

In this section we learn:
* How to load a pandas DataFrame
* Saving a DataFrame to a Commma Separated File (CSV)
* Create or turn other data types into a DataFrame
* Examine DataFrame data
* Indexing and selecting segments/slicing of our Data Frame
* Slicing using iloc
* Numeric Operations on Series
* Pandas DataFrames
* Descriptive Stats on series
* Descriptive Stats on DataFrames
* Filtering using Boolean Operations
* Finding empty cells using isnull()
* Combining Boolean Filtering

## Loading our data from a github source

In [0]:
import pandas as pd

# Load our CSV file using pandas
file_name = "https://raw.githubusercontent.com/rajeevratan84/datascienceforbusiness/master/titanic.csv"
df = pd.read_csv(file_name)

df.head()

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,0


In [0]:
df.head(10)

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,0
5,1,"Anderson, Mr. Harry",male,48.0,0,0,19952,26.55,E12,S,1
6,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S,1
7,1,"Andrews, Mr. Thomas Jr",male,39.0,0,0,112050,0.0,A36,S,0
8,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53.0,2,0,11769,51.4792,C101,S,1
9,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C,0


In [0]:
# Saving to a CSV file
df.to_csv("myDataFrame2.csv")

# Of even an excel file
df.to_excel("myDataFrame2.xlsx")

In [0]:
# Explore the first 5 lines of our dataframe

df.head()

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,0


**Description of the columns**

* pclass: class of travel
* name: full name of the passenger
* sex: gender
* age: numerical age
* sibsp: # of siblings/spouse aboard
* parch: number of parents/child aboard
* ticket: ticket number
* fare: cost of the ticket
* cabin: location of room
* emarked: port that the passenger embarked at (C - Cherbourg, S - Southampton, Q = Queenstown)
* survived: survial metric (0 - died, 1 - survived)

In [0]:
# Expore the data using summary statistics

df.describe()

Unnamed: 0,pclass,age,sibsp,parch,fare,survived
count,1309.0,1046.0,1309.0,1309.0,1308.0,1309.0
mean,2.294882,29.881135,0.498854,0.385027,33.295479,0.381971
std,0.837836,14.4135,1.041658,0.86556,51.758668,0.486055
min,1.0,0.1667,0.0,0.0,0.0,0.0
25%,2.0,21.0,0.0,0.0,7.8958,0.0
50%,3.0,28.0,0.0,0.0,14.4542,0.0
75%,3.0,39.0,1.0,0.0,31.275,1.0
max,3.0,80.0,8.0,9.0,512.3292,1.0


In [0]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 11 columns):
pclass      1309 non-null int64
name        1309 non-null object
sex         1309 non-null object
age         1046 non-null float64
sibsp       1309 non-null int64
parch       1309 non-null int64
ticket      1309 non-null object
fare        1308 non-null float64
cabin       295 non-null object
embarked    1307 non-null object
survived    1309 non-null int64
dtypes: float64(2), int64(4), object(5)
memory usage: 112.6+ KB


## Creating dataframes from non csv sources. 
#### DataFrames can loaded from excel sheets, python lists, dictionaries and more!

In [0]:
# Create a list
list_a = ['a', 'b', 'c']
list_a

['a', 'b', 'c']

In [0]:
# 
df_a = pd.DataFrame(list_a)
df_a

Unnamed: 0,0
0,a
1,b
2,c


In [0]:
dict_df = pd.DataFrame({
    'http_status': [200,200,404,404,301],
    'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]})
dict_df

Unnamed: 0,http_status,response_time
0,200,0.04
1,200,0.02
2,404,0.07
3,404,0.08
4,301,1.0


# Examining our DataFrames

In [0]:
# View the last 5 lines
df.tail(10)

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
1299,3,"Yasbeck, Mr. Antoni",male,27.0,1,0,2659,14.4542,,C,0
1300,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15.0,1,0,2659,14.4542,,C,1
1301,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C,0
1302,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C,0
1303,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C,0
1304,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C,0
1305,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C,0
1306,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C,0
1307,3,"Zakarian, Mr. Ortin",male,27.0,0,0,2670,7.225,,C,0
1308,3,"Zimmerman, Mr. Leo",male,29.0,0,0,315082,7.875,,S,0


In [0]:
df.describe()

Unnamed: 0,pclass,age,sibsp,parch,fare,survived
count,1309.0,1046.0,1309.0,1309.0,1308.0,1309.0
mean,2.294882,29.881135,0.498854,0.385027,33.295479,0.381971
std,0.837836,14.4135,1.041658,0.86556,51.758668,0.486055
min,1.0,0.1667,0.0,0.0,0.0,0.0
25%,2.0,21.0,0.0,0.0,7.8958,0.0
50%,3.0,28.0,0.0,0.0,14.4542,0.0
75%,3.0,39.0,1.0,0.0,31.275,1.0
max,3.0,80.0,8.0,9.0,512.3292,1.0


In [0]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 11 columns):
pclass      1309 non-null int64
name        1309 non-null object
sex         1309 non-null object
age         1046 non-null float64
sibsp       1309 non-null int64
parch       1309 non-null int64
ticket      1309 non-null object
fare        1308 non-null float64
cabin       295 non-null object
embarked    1307 non-null object
survived    1309 non-null int64
dtypes: float64(2), int64(4), object(5)
memory usage: 112.6+ KB


In [0]:
df.dtypes

pclass        int64
name         object
sex          object
age         float64
sibsp         int64
parch         int64
ticket       object
fare        float64
cabin        object
embarked     object
survived      int64
dtype: object

In [0]:
df.columns

Index(['pclass', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare',
       'cabin', 'embarked', 'survived'],
      dtype='object')

In [0]:
# viewing one column
df['name']

0                           Allen, Miss. Elisabeth Walton
1                          Allison, Master. Hudson Trevor
2                            Allison, Miss. Helen Loraine
3                    Allison, Mr. Hudson Joshua Creighton
4         Allison, Mrs. Hudson J C (Bessie Waldo Daniels)
5                                     Anderson, Mr. Harry
6                       Andrews, Miss. Kornelia Theodosia
7                                  Andrews, Mr. Thomas Jr
8           Appleton, Mrs. Edward Dale (Charlotte Lamson)
9                                 Artagaveytia, Mr. Ramon
10                                 Astor, Col. John Jacob
11      Astor, Mrs. John Jacob (Madeleine Talmadge Force)
12                          Aubart, Mme. Leontine Pauline
13                           Barber, Miss. Ellen "Nellie"
14                   Barkworth, Mr. Algernon Henry Wilson
15                                    Baumann, Mr. John D
16                               Baxter, Mr. Quigg Edmond
17        Baxt

# Indexing and selecting segments of our Data Frame

In [0]:
df.head()

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,0


In [0]:
# df.loc[row_label, column_label]

df.loc[1,"name"]

'Allison, Master. Hudson Trevor'

In [0]:
df.loc[1,"name"]

'Allison, Master. Hudson Trevor'

In [0]:
# Slicing an entire column

df.loc[:,"name"]

0                           Allen, Miss. Elisabeth Walton
1                          Allison, Master. Hudson Trevor
2                            Allison, Miss. Helen Loraine
3                    Allison, Mr. Hudson Joshua Creighton
4         Allison, Mrs. Hudson J C (Bessie Waldo Daniels)
5                                     Anderson, Mr. Harry
6                       Andrews, Miss. Kornelia Theodosia
7                                  Andrews, Mr. Thomas Jr
8           Appleton, Mrs. Edward Dale (Charlotte Lamson)
9                                 Artagaveytia, Mr. Ramon
10                                 Astor, Col. John Jacob
11      Astor, Mrs. John Jacob (Madeleine Talmadge Force)
12                          Aubart, Mme. Leontine Pauline
13                           Barber, Miss. Ellen "Nellie"
14                   Barkworth, Mr. Algernon Henry Wilson
15                                    Baumann, Mr. John D
16                               Baxter, Mr. Quigg Edmond
17        Baxt

In [0]:
names = df.loc[:,"name"] 

In [0]:
names.head()

0                      Allen, Miss. Elisabeth Walton
1                     Allison, Master. Hudson Trevor
2                       Allison, Miss. Helen Loraine
3               Allison, Mr. Hudson Joshua Creighton
4    Allison, Mrs. Hudson J C (Bessie Waldo Daniels)
Name: name, dtype: object

In [0]:
# Slicing a row

df.loc[0,:]

pclass                                  1
name        Allen, Miss. Elisabeth Walton
sex                                female
age                                    29
sibsp                                   0
parch                                   0
ticket                              24160
fare                              211.338
cabin                                  B5
embarked                                S
survived                                1
Name: 0, dtype: object

# Slicing using iloc

### df.iloc[row_index, column_index]

* **loc**: label based selection
* **iloc**: integer position based selection

In [0]:
df.head()

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,0


In [0]:
df.iloc[0,1]

'Allen, Miss. Elisabeth Walton'

In [0]:
df.iloc[0]

pclass                                  1
name        Allen, Miss. Elisabeth Walton
sex                                female
age                                    29
sibsp                                   0
parch                                   0
ticket                              24160
fare                              211.338
cabin                                  B5
embarked                                S
survived                                1
Name: 0, dtype: object

### Note: A 1-Dim pandas objects is a series. A 2-Dim pandas object is a dataframe.

In [0]:
# Slicing mulitple columns

df.loc[:,["name", "sex"]] 

Unnamed: 0,name,sex
0,"Allen, Miss. Elisabeth Walton",female
1,"Allison, Master. Hudson Trevor",male
2,"Allison, Miss. Helen Loraine",female
3,"Allison, Mr. Hudson Joshua Creighton",male
4,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female
5,"Anderson, Mr. Harry",male
6,"Andrews, Miss. Kornelia Theodosia",female
7,"Andrews, Mr. Thomas Jr",male
8,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female
9,"Artagaveytia, Mr. Ramon",male


In [0]:
# An alternative way of slicing columns 

df[["name", "sex"]]

Unnamed: 0,name,sex
0,"Allen, Miss. Elisabeth Walton",female
1,"Allison, Master. Hudson Trevor",male
2,"Allison, Miss. Helen Loraine",female
3,"Allison, Mr. Hudson Joshua Creighton",male
4,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female
5,"Anderson, Mr. Harry",male
6,"Andrews, Miss. Kornelia Theodosia",female
7,"Andrews, Mr. Thomas Jr",male
8,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female
9,"Artagaveytia, Mr. Ramon",male


In [0]:
# Slicing rows is similar as well

df.loc[2:4,:]

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,0


# Numeric Operations on Series

In [0]:
# let's create a simple DataFrame

simple_df = pd.DataFrame({
    'a': [1,2,3,4,5],
    'b': [5, 15, 10, 20, 15]})
simple_df

Unnamed: 0,a,b
0,1,5
1,2,15
2,3,10
3,4,20
4,5,15


In [0]:
# Let's extract a series for 'a' and 'b'

a = simple_df['a']
a

0    1
1    2
2    3
3    4
4    5
Name: a, dtype: int64

In [0]:
b = simple_df['b']
b

0     5
1    15
2    10
3    20
4    15
Name: b, dtype: int64

In [0]:
c = a + b
c

0     6
1    17
2    13
3    24
4    20
dtype: int64

# Descriptive Stats on series

In [0]:
a.max()

5

In [0]:
a.min()

1

In [0]:
a.mean()

3.0

In [0]:
a.median()

3.0

In [0]:
b.mode()

0    15
dtype: int64

In [0]:
b.sum()

65

In [0]:
b

0     5
1    15
2    10
3    20
4    15
Name: b, dtype: int64

In [0]:
b.value_counts()

15    2
5     1
20    1
10    1
Name: b, dtype: int64

In [0]:
b.describe()

count     5.000000
mean     13.000000
std       5.700877
min       5.000000
25%      10.000000
50%      15.000000
75%      15.000000
max      20.000000
Name: b, dtype: float64

In [0]:
# Remember our Titanic Dataset? can we use describe on a coloumn of data

df.head()

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,0


In [0]:
df['sex'].describe()

# count - number of rows or values in that column
# unique - number of unique categories in the column
# top - most populous category
# freq - count of the most popular category

count     1309
unique       2
top       male
freq       843
Name: sex, dtype: object

In [0]:
# Let's learn a new function called unique

df['pclass'].unique()

array([1, 2, 3])

# Descriptive Stats on DataFrames

In [0]:
df[["fare","age"]].mean()

fare    33.295479
age     29.881135
dtype: float64

In [0]:
# Specifying the axies
# Note Axis = 0 is used by default and calculates the column statistics

df[["fare","age"]].mean(axis=0)

fare    33.295479
age     29.881135
dtype: float64

### Visualizing Axis
```
+------------+---------+--------+
|            |  A      |  B     |
+------------+---------+---------
|      0     | 10      | 15     |----axis=1----->
+------------+---------+--------+
             |         |
             | axis=0  |
             ↓         ↓
```



In [0]:
# if we use axis = 1, we are calculating the row statistics

df[["fare","age"]].mean(axis=1)

0       120.16875
1        76.23335
2        76.77500
3        90.77500
4        88.27500
          ...    
1304     14.47710
1305     14.45420
1306     16.86250
1307     17.11250
1308     18.43750
Length: 1309, dtype: float64

In [0]:
# More examples

df.mean(numeric_only=True)

pclass       2.294882
age         29.881135
sibsp        0.498854
parch        0.385027
fare        33.295479
survived     0.381971
dtype: float64

# Filtering using Boolean Operations

In [0]:
df.head()

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,0


In [0]:
df['pclass'] == 1

0        True
1        True
2        True
3        True
4        True
5        True
6        True
7        True
8        True
9        True
10       True
11       True
12       True
13       True
14       True
15       True
16       True
17       True
18       True
19       True
20       True
21       True
22       True
23       True
24       True
25       True
26       True
27       True
28       True
29       True
        ...  
1279    False
1280    False
1281    False
1282    False
1283    False
1284    False
1285    False
1286    False
1287    False
1288    False
1289    False
1290    False
1291    False
1292    False
1293    False
1294    False
1295    False
1296    False
1297    False
1298    False
1299    False
1300    False
1301    False
1302    False
1303    False
1304    False
1305    False
1306    False
1307    False
1308    False
Name: pclass, Length: 1309, dtype: bool

In [0]:
df['age'] > 70

0       False
1       False
2       False
3       False
4       False
        ...  
1304    False
1305    False
1306    False
1307    False
1308    False
Name: age, Length: 1309, dtype: bool

In [0]:
# Viewing on those that are true

df[df['age'] > 70]

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
9,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C,0
14,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0,A23,S,1
61,1,"Cavendish, Mrs. Tyrell William (Julia Florence...",female,76.0,1,0,19877,78.85,C46,S,1
135,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C,0
727,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q,0
1235,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.775,,S,0


In [0]:
# Filtering on exact cell values

df[df['name'] == 'Barkworth, Mr. Algernon Henry Wilson']

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
14,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0,A23,S,1


# Finding empty cells using isnull()

In [0]:
df.isnull()

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
1304,False,False,False,False,False,False,False,False,True,False,False
1305,False,False,False,True,False,False,False,False,True,False,False
1306,False,False,False,False,False,False,False,False,True,False,False
1307,False,False,False,False,False,False,False,False,True,False,False


In [0]:
df['cabin'].isnull()

0       False
1       False
2       False
3       False
4       False
        ...  
1304     True
1305     True
1306     True
1307     True
1308     True
Name: cabin, Length: 1309, dtype: bool

In [0]:
df[df['cabin'].isnull()]

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
9,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C,0
13,1,"Barber, Miss. Ellen ""Nellie""",female,26.0,0,0,19877,78.8500,,S,1
15,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.9250,,S,0
23,1,"Bidois, Miss. Rosalie",female,42.0,0,0,PC 17757,227.5250,,C,1
25,1,"Birnbaum, Mr. Jakob",male,25.0,0,0,13905,26.0000,,C,0
...,...,...,...,...,...,...,...,...,...,...,...
1304,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C,0
1305,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C,0
1306,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.2250,,C,0
1307,3,"Zakarian, Mr. Ortin",male,27.0,0,0,2670,7.2250,,C,0


In [0]:
df[df['cabin'].notnull()]

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0000,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.5500,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0000,1,2,113781,151.5500,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0000,1,2,113781,151.5500,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0000,1,2,113781,151.5500,C22 C26,S,0
...,...,...,...,...,...,...,...,...,...,...,...
1189,3,"Sandstrom, Miss. Marguerite Rut",female,4.0000,1,1,PP 9549,16.7000,G6,S,1
1217,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19.0000,0,0,348124,7.6500,F G73,S,0
1230,3,"Strom, Miss. Telma Matilda",female,2.0000,0,1,347054,10.4625,G6,S,0
1231,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29.0000,1,1,347054,10.4625,G6,S,0


# Combining Boolean Filtering

In [0]:
over_60 = df['age'] > 60
cabin_exists = df['cabin'].notnull()

In [0]:
over_60_cabin_exists = over_60 + cabin_exists

  op=op_str, alt_op=unsupported[op_str]


In [0]:
df[over_60_cabin_exists]

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
0,1,"Allen, Miss. Elisabeth Walton",female,29.0000,0,0,24160,211.3375,B5,S,1
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.5500,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0000,1,2,113781,151.5500,C22 C26,S,0
3,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0000,1,2,113781,151.5500,C22 C26,S,0
4,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0000,1,2,113781,151.5500,C22 C26,S,0
...,...,...,...,...,...,...,...,...,...,...,...
1230,3,"Strom, Miss. Telma Matilda",female,2.0000,0,1,347054,10.4625,G6,S,0
1231,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29.0000,1,1,347054,10.4625,G6,S,0
1235,3,"Svensson, Mr. Johan",male,74.0000,0,0,347060,7.7750,,S,0
1249,3,"Tobin, Mr. Roger",male,,0,0,383121,7.7500,F38,Q,0


In [0]:
over_60_and_under_5 = (df['age'] > 60) | (df['age'] < 5)

In [0]:
df[over_60_and_under_5]

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,survived
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.5500,C22 C26,S,1
2,1,"Allison, Miss. Helen Loraine",female,2.0000,1,2,113781,151.5500,C22 C26,S,0
6,1,"Andrews, Miss. Kornelia Theodosia",female,63.0000,1,0,13502,77.9583,D7,S,1
9,1,"Artagaveytia, Mr. Ramon",male,71.0000,0,0,PC 17609,49.5042,,C,0
14,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0000,0,0,27042,30.0000,A23,S,1
...,...,...,...,...,...,...,...,...,...,...,...
1225,3,"Storey, Mr. Thomas",male,60.5000,0,0,3701,,,S,0
1230,3,"Strom, Miss. Telma Matilda",female,2.0000,0,1,347054,10.4625,G6,S,0
1235,3,"Svensson, Mr. Johan",male,74.0000,0,0,347060,7.7750,,S,0
1240,3,"Thomas, Master. Assad Alexander",male,0.4167,0,1,2625,8.5167,,C,1


# NOTE:

#### a & b (pandas) = a and b (python)
#### a | b (pandas) = a or b (python)
