In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

plt.style.use('fivethirtyeight')
sns.set_context("notebook")

## Reading in DataFrames from Files

#### Pandas has a number of very useful file reading tools. You can see them enumerated by typing "pd.re" and pressing tab. We'll be using read_csv today.

In [13]:
elections = pd.read_csv("titanic_missing.csv")
elections # if we end a cell with an expression or variable name, the result will print

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [14]:
elections[3:7]

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
5,897,3,"Svensson, Mr. Johan Cervin",male,14.0,0,0,7538,9.225,,S
6,898,3,"Connolly, Miss. Kate",female,30.0,0,0,330972,7.6292,,Q


In [18]:
elections['Name'].head(5)
elections['Name'].tail(5)

413              Spector, Mr. Woolf
414    Oliva y Ocana, Dona. Fermina
415    Saether, Mr. Simon Sivertsen
416             Ware, Mr. Frederick
417        Peter, Master. Michael J
Name: Name, dtype: object

In [19]:
type(elections)

pandas.core.frame.DataFrame

In [20]:
type(elections['Name'])

pandas.core.series.Series

In [23]:
elections.columns

Index(['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch',
       'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [32]:
# show the name series and they are male 
winners = elections.query('Sex == "male"')['Name']
winners.head(5)

0                Kelly, Mr. James
2       Myles, Mr. Thomas Francis
3                Wirz, Mr. Albert
5      Svensson, Mr. Johan Cervin
7    Caldwell, Mr. Albert Francis
Name: Name, dtype: object

In [35]:
winners = elections.query('Sex == "male"')['Age']
winners.head(5)

0    34.5
2    62.0
3    27.0
5    14.0
7    26.0
Name: Age, dtype: float64

In [36]:
np.mean(winners)

30.27273170731707

In [37]:
np.max(winners)

67.0

In [38]:
elections.size

4598

In [40]:
elections.describe()

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare
count,418.0,418.0,333.0,418.0,418.0,417.0
mean,1100.5,2.26555,30.178679,0.447368,0.392344,35.627188
std,120.810458,0.841838,14.263163,0.89676,0.981429,55.907576
min,892.0,1.0,-1.0,0.0,0.0,0.0
25%,996.25,1.0,21.0,0.0,0.0,7.8958
50%,1100.5,3.0,27.0,0.0,0.0,14.4542
75%,1204.75,3.0,39.0,1.0,0.0,31.5
max,1309.0,3.0,76.0,8.0,9.0,512.3292


In [41]:
# randon select 5 items
elections.sample(5)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
56,948,3,"Cor, Mr. Bartol",male,35.0,0,0,349230,7.8958,,S
385,1277,2,"Herman, Miss. Kate",female,24.0,1,2,220845,65.0,,S
226,1118,3,"Asplund, Mr. Johan Charles",male,23.0,0,0,350054,7.7958,,S
14,906,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance...",female,47.0,1,0,W.E.P. 5734,61.175,E31,S
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S


In [45]:
elections.query('Age > 30' and 'Age <50' ).sample(5, replace = True)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S
147,1039,3,"Davies, Mr. Evan",male,22.0,0,0,SC/A4 23568,8.05,,S
321,1213,3,"Krekorian, Mr. Neshan",male,25.0,0,0,2654,7.2292,F E57,C
354,1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S
221,1113,3,"Reynolds, Mr. Harold J",male,21.0,0,0,342684,8.05,,S


In [46]:
elections.Age.value_counts()

 21.0    17
 24.0    17
 22.0    16
 30.0    15
 18.0    13
         ..
 28.5     1
 22.5     1
-1.0      1
 62.0     1
 38.5     1
Name: Age, Length: 80, dtype: int64