In [1]:
import pandas as pd

In [25]:
mean_months = ['Apr', 'Jan', 'Jul', 'Oct']
mean_temps = [61.956, 32.133, 68.935, 43.435]

max_months = ['Jan', 'Apr', 'Jul', 'Oct']
max_temps = [68, 89, 91, 84]

min_months = ['Feb', 'Apr', 'Jul', 'Dec']
min_temps = [38, 49, 51, 35]

column_names = ['Month', 'TempF']

column_mean_values = [mean_months, mean_temps]
column_max_values = [max_months, max_temps]
column_min_values = [min_months, min_temps]




In [26]:
w_mean = pd.DataFrame(dict(list(zip(column_names, column_mean_values))))
w_max = pd.DataFrame(dict(list(zip(column_names, column_max_values))))
w_min = pd.DataFrame(dict(list(zip(column_names, column_min_values))))

w_mean = w_mean.set_index('Month')
w_max = w_max.set_index('Month')
w_min = w_min.set_index('Month')

In [27]:
# Notice that the indexes are not in the same order (one is in chronological and the other in alphabetical)
print(w_mean)
print(w_max)

        TempF
Month        
Apr    61.956
Jan    32.133
Jul    68.935
Oct    43.435
       TempF
Month       
Jan       68
Apr       89
Jul       91
Oct       84


In [23]:
# We can change the index on the w_mean to also follow the chronological order:
ordered = ['Jan', 'Apr', 'Jul', 'Oct']

w_mean2 = w_mean.reindex(ordered)
print(w_mean2)

# Instead of defining the new list called 'ordered' we could have done the following:
w_mean3 = w_mean.reindex(w_max.index)
print(w_mean3)

        TempF
Month        
Jan    32.133
Apr    61.956
Jul    68.935
Oct    43.435
        TempF
Month        
Jan    32.133
Apr    61.956
Jul    68.935
Oct    43.435


In [24]:
# We can get the original alphabetical order using the sort_index():
w_mean2.sort_index()

Unnamed: 0_level_0,TempF
Month,Unnamed: 1_level_1
Apr,61.956
Jan,32.133
Jul,68.935
Oct,43.435


In [30]:
# To sort by column instead of values use sort_values():
w_mean2.sort_values(by='TempF', ascending=True)

Unnamed: 0_level_0,TempF
Month,Unnamed: 1_level_1
Jan,32.133
Oct,43.435
Apr,61.956
Jul,68.935


In [28]:
# Now consider what happens when we reindex by an uncommon index:
w_mean2.reindex(w_min.index)

Unnamed: 0_level_0,TempF
Month,Unnamed: 1_level_1
Feb,
Apr,61.956
Jul,68.935
Dec,


In [29]:
# Notice that only the common values exist in the reindexed dataframe and NaNs elsewhere. We could drop those:
w_mean2.reindex(w_min.index).dropna()

Unnamed: 0_level_0,TempF
Month,Unnamed: 1_level_1
Apr,61.956
Jul,68.935


In [31]:
# Another possibility is to use the ffill() (forward-fill) and bfill() (backward-fill) methods to fill the NaNs:
year = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',  'Oct', 'Nov', 'Dec']
w_mean2.reindex(year)

Unnamed: 0_level_0,TempF
Month,Unnamed: 1_level_1
Jan,32.133
Feb,
Mar,
Apr,61.956
May,
Jun,
Jul,68.935
Aug,
Sep,
Oct,43.435


In [32]:
w_mean2.reindex(year).ffill()

Unnamed: 0_level_0,TempF
Month,Unnamed: 1_level_1
Jan,32.133
Feb,32.133
Mar,32.133
Apr,61.956
May,61.956
Jun,61.956
Jul,68.935
Aug,68.935
Sep,68.935
Oct,43.435


In [35]:
# We can perform arithmetic operations between datasets. Consider these two new dataframes:
data_jan = {
    'Company': ['Acme Corporation', 'Hooli','Initech', 'Mediacore', 'Streeplex'],
    'Units': [19, 17, 20, 10, 13]
}

data_feb = {
    'Company': ['Acme Corporation', 'Hooli', 'Mediacore', 'Vandelay Inc'],
    'Units': [15, 3, 13, 25]
}

january = pd.DataFrame(data_jan)
february = pd.DataFrame(data_feb)

january = january.set_index('Company')
february = february.set_index('Company')

In [36]:
january

Unnamed: 0_level_0,Units
Company,Unnamed: 1_level_1
Acme Corporation,19
Hooli,17
Initech,20
Mediacore,10
Streeplex,13


In [37]:
# Notice that when using the primitive addition operator '+' only common index values are added and the rest are
# left empty.
january + february

Unnamed: 0_level_0,Units
Company,Unnamed: 1_level_1
Acme Corporation,34.0
Hooli,20.0
Initech,
Mediacore,23.0
Streeplex,
Vandelay Inc,


In [39]:
# This can be prevented by usingt the .add() operator and the fill_value option:
january.add(february, fill_value=0)

Unnamed: 0_level_0,Units
Company,Unnamed: 1_level_1
Acme Corporation,34.0
Hooli,20.0
Initech,20.0
Mediacore,23.0
Streeplex,13.0
Vandelay Inc,25.0


In [41]:
gdp = pd.read_csv('GDP.csv', index_col='DATE', parse_dates=True)

In [43]:
gdp.head()


Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
1947-01-01,243.1
1947-04-01,246.3
1947-07-01,250.1
1947-10-01,260.3
1948-01-01,266.2


In [52]:
# Having date-index keys is very convenient since we can roll-up easily using the resample():
yearly = gdp.resample('A').last() # This rolls up to year
yearly.head()

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
1947-12-31,260.3
1948-12-31,280.7
1949-12-31,271.0
1950-12-31,320.3
1951-12-31,356.6


__This is a list of all the possible resampling strategies that we can use with resample:__
- __B__ :       business day frequency
- __C__ :      custom business day frequency (experimental)
- __D__ :       calendar day frequency
- __W__ :       weekly frequency
- __M__ :       month end frequency
- __SM__ :      semi-month end frequency (15th and end of month)
- __BM__ :      business month end frequency
- __CBM__ :     custom business month end frequency
- __MS__ :      month start frequency
- __SMS__ :     semi-month start frequency (1st and 15th)
- __BMS__ :     business month start frequency
- __CBMS__ :    custom business month start frequency
- __Q__ :       quarter end frequency
- __BQ__ :      business quarter endfrequency
- __QS__ :      quarter start frequency
- __BQS__ :     business quarter start frequency
- __A__ :       year end frequency
- __BA__ :      business year end frequency
- __AS__ :      year start frequency
- __BAS__ :     business year start frequency
- __BH__ :      business hour frequency
- __H__ :       hourly frequency
- __T__ :       minutely frequency
- __S__ :       secondly frequency
- __L__ :       milliseonds
- __U__ :       microseconds
- __N__ :       nanoseconds

In [53]:
yearly.pct_change().tail(10) * 100

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
2008-12-31,-0.92201
2009-12-31,0.11409
2010-12-31,4.556345
2011-12-31,3.644732
2012-12-31,3.243524
2013-12-31,4.311144
2014-12-31,4.329437
2015-12-31,3.108385
2016-12-31,3.381053
2017-12-31,1.804766
