In [1]:
import numpy as np
import pandas as pd


def get_precip_data():
    return pd.read_csv('precipitation.csv', parse_dates=[2])


def date_to_month(d):
    # You may need to modify this function, depending on your data types.
    return '%04i-%02i' % (d.year, d.month)


def pivot_months_pandas(data):
    """
    Create monthly precipitation totals for each station in the data set.
    
    This should use Pandas methods to manipulate the data.
    """
    # ...
    transfer_month = np.vectorize(date_to_month,otypes=[np.str_])
    data['month']=transfer_month(data['date'])
    new_frame1 = data.groupby(['name','month']).sum().reset_index()
    new_frame2 = data.groupby(['name','month']).count().reset_index()
    monthly = new_frame1.pivot(index='name',columns='month',values='precipitation')
    print(monthly)
    counts = new_frame2.pivot(index='name',columns='month',values='precipitation')
    return monthly, counts


def pivot_months_loops(data):
    """
    Create monthly precipitation totals for each station in the data set.
    
    This does it the hard way: using Pandas as a dumb data store, and iterating in Python.
    """
    # Find all stations and months in the data set.
    stations = set()
    months = set()
    for i,r in data.iterrows():
        stations.add(r['name'])
        m = date_to_month(r['date'])
        months.add(m)

    # Aggregate into dictionaries so we can look up later.
    stations = sorted(list(stations))
    row_to_station = dict(enumerate(stations))
    station_to_row = {s: i for i,s in row_to_station.items()}
    
    months = sorted(list(months))
    col_to_month = dict(enumerate(months))
    month_to_col = {m: i for i,m in col_to_month.items()}

    # Create arrays for the data, and fill them.
    precip_total = np.zeros((len(row_to_station), 12), dtype=np.uint)
    obs_count = np.zeros((len(row_to_station), 12), dtype=np.uint)

    for _, row in data.iterrows():
        m = date_to_month(row['date'])
        r = station_to_row[row['name']]
        c = month_to_col[m]

        precip_total[r, c] += row['precipitation']
        obs_count[r, c] += 1

    # Build the DataFrames we needed all along (tidying up the index names while we're at it).
    totals = pd.DataFrame(
        data=precip_total,
        index=stations,
        columns=months,
    )
    totals.index.name = 'name'
    totals.columns.name = 'month'
    
    counts = pd.DataFrame(
        data=obs_count,
        index=stations,
        columns=months,
    )
    counts.index.name = 'name'
    counts.columns.name = 'month'
    
    return totals, counts


def main():
    data = get_precip_data()
    totals, counts = pivot_months_pandas(data)
    totals.to_csv('totals.csv')
    counts.to_csv('counts.csv')
    np.savez('monthdata.npz', totals=totals.values, counts=counts.values)


if __name__ == '__main__':
    main()


month                           2016-01  2016-02  2016-03  2016-04  2016-05  \
name                                                                          
BURNABY SIMON FRASER U             1809     1482     2159      374      606   
CALGARY INTL A                      121       22       46       40      683   
GANDER INTL A                       896     1232      992     1106     1236   
HALIFAX INTL A                     1491     1473     1452     1595     1001   
REVELSTOKE                          930      561      533      204      546   
SHERBROOKE                          183      982      621      471      667   
TORONTO LESTER B. PEARSON INT'      363      491      729      681      346   
VANCOUVER INTL A                   1682     1304     1616      242      516   
YELLOWKNIFE A                       220       90       28      120       98   

month                           2016-06  2016-07  2016-08  2016-09  2016-10  \
name                                               