Import Numpy and Pandas, and DataFrames/Series

In [2]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

Set Some Pandas Options

In [10]:
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

create a four item DataFrame

In [40]:
s = Series([1,2,3,4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [36]:
#return a series with the rows with labels 1 and 3
s[[1,3]]

1    2
3    4
dtype: int64

In [37]:
#Create a series using an explicit index 
s = Series([1,2,3,4],
           index = ['a','b','c','d'])
s

a    1
b    2
c    3
d    4
dtype: int64

In [38]:
#look up items the series having an index 'a' and 'd'
s[['a', 'd']]


a    1
d    4
dtype: int64

In [39]:
# passing a list of integers in to a Series that has 
#non-integer index labels will look up based upon 
#0-based index like an array 
s[[1,2]]

b    2
c    3
dtype: int64

In [41]:
# Get only the index of the series 
s.index 

RangeIndex(start=0, stop=4, step=1)

In [42]:
# Create a series who's index is a series of dates 
# betweem the two specified dates (inclusive)
dates = pd.date_range('2014-07-01', '2014-07-06')
dates

DatetimeIndex(['2014-07-01', '2014-07-02', '2014-07-03', '2014-07-04',
               '2014-07-05', '2014-07-06'],
              dtype='datetime64[ns]', freq='D')

In [44]:
# Create a Series with values (representing temperatures)
# for each date in the index
temps1 = Series([80, 82, 85, 90, 83, 87],
                index = dates)
temps1


2014-07-01    80
2014-07-02    82
2014-07-03    85
2014-07-04    90
2014-07-05    83
2014-07-06    87
Freq: D, dtype: int64

In [45]:
# calculate the mean of the values in the Series
temps1.mean()

84.5

In [50]:
# Create a second series of objects using the same index
temps2 = Series([70,75,69,83,79,77],
                index = dates)
#the following aligns the two by their index values 
# and calculates the difference at those matching labels
temp_diffs = temps1 - temps2
temp_diffs

2014-07-01    10
2014-07-02     7
2014-07-03    16
2014-07-04     7
2014-07-05     4
2014-07-06    10
Freq: D, dtype: int64

In [51]:
# look up a  value by date using the index 
temp_diffs['2014-07-03']

16

In [52]:
#and also possible by integer position as if the series was an array
temp_diffs[2]

16

In [53]:
# Create a Dataframe from the two series objects temp1 and temp2
# and give them a column name
temps_df = DataFrame(
    {'Missoula': temps1,
    'Philadelphia': temps2})
temps_df

            Missoula  Philadelphia
2014-07-01        80            70
2014-07-02        82            75
2014-07-03        85            69
2014-07-04        90            83
2014-07-05        83            79
2014-07-06        87            77

In [54]:
# Get the column with the name Missoula
temps_df['Missoula']

2014-07-01    80
2014-07-02    82
2014-07-03    85
2014-07-04    90
2014-07-05    83
2014-07-06    87
Freq: D, Name: Missoula, dtype: int64

In [55]:
#likewise get Philly column
temps_df['Philadelphia']

2014-07-01    70
2014-07-02    75
2014-07-03    69
2014-07-04    83
2014-07-05    79
2014-07-06    77
Freq: D, Name: Philadelphia, dtype: int64

In [56]:
# return the code but the columns reversed
temps_df[['Philadelphia', 'Missoula']]

            Philadelphia  Missoula
2014-07-01            70        80
2014-07-02            75        82
2014-07-03            69        85
2014-07-04            83        90
2014-07-05            79        83
2014-07-06            77        87

In [57]:
# retrieve the Missoula column through property syntax
temps_df.Missoula

2014-07-01    80
2014-07-02    82
2014-07-03    85
2014-07-04    90
2014-07-05    83
2014-07-06    87
Freq: D, Name: Missoula, dtype: int64