In [None]:
import pandas as pd
import numpy as np
import seaborn as sb
from scipy.stats.mstats import gmean
import matplotlib.pyplot as plt
import glob
from sklearn.utils import shuffle

%matplotlib inline


def csv_by_TA(rentdf):
    cleaned_rdf = rentdf[rentdf['Month'].notnull()
                         & rentdf['Wellington'].notnull()
                         & rentdf['Auckland'].notnull()]
    
    cleaned_rdf['Month'] = pd.to_datetime(rentdf['Month'])
    cleaned_rdf.set_index(['Month'], inplace=True)
    
    
    cleaned_rdf = cleaned_rdf.loc[:,
            ['Wellington', 'Auckland', 'Lower Hutt', 'Upper Hutt', 
             'Porirua', 'Kapiti Coast District', 'National Total'
            ]]

    
    cleaned_rdf = cleaned_rdf.rename(columns={'Wellington':'WellingtonTA', 'Auckland':'AucklandTA'})

    ax = cleaned_rdf.plot(
#         y=['Wellington', 'Auckland', 'Lower Hutt', 'Upper Hutt', 
#              'Porirua', 'Kapiti Coast District', 'National Total'],
        colormap='gist_rainbow',
        title='Mean Geometric Rent by TA',
        markersize=10
    )

    ax.set_xlim(pd.Timestamp('2006-01-01'), pd.Timestamp('2018-08-14'))
#     ax.set_ylabel("Month")
    ax.set_ylabel("Mean Geometric Rent ($)")
    plots.append(ax)
    
    
    return cleaned_rdf

def csv_by_Region(rentdf):
    cleaned_rdf = rentdf[rentdf['Month'].notnull()
                         & rentdf['Wellington'].notnull()
                         & rentdf['Auckland'].notnull()]

    cleaned_rdf['Month'] = pd.to_datetime(rentdf['Month'])
    # last14years_mask = (cleaned_rdf['Month'] > '2006-01-01') & (cleaned_rdf['Month'] <= '2018-08-14')

    cleaned_rdf.set_index(['Month'], inplace=True)

    ax = cleaned_rdf.plot(
        y=['Wellington', 'Auckland'],
        colormap='plasma',
        title='Mean Geometric Rent by Region',
        markersize=10
    )

    ax.set_xlim(pd.Timestamp('2006-01-01'), pd.Timestamp('2018-08-14'))

    plots.append(ax)
    
    
    
    cleaned_rdf = cleaned_rdf.loc[:,
            ['Wellington', 'Auckland', 'National Total'
            ]]
    return cleaned_rdf

def main():
    plots = []
    DF_DICT = {}

    DF_DICT['TA-rent'] = csv_by_TA(pd.read_csv(".\\data\\ta-geometric-mean.csv"))
    DF_DICT['REG-rent']= csv_by_Region(pd.read_csv(".\\data\\region-geometric-mean-rents.csv"))

    vic_summ_df = pd.read_csv(".\\data\\tertiary_summaries_08-16.csv")
    pd.to_datetime(vic_summ_df['Year'])
    vic_summ_df.set_index(['Year'], inplace=True)

    DF_DICT['VIC-enroll'], DF_DICT['VIC-efts'], DF_DICT['VIC-comp'] = (vic_summ_df.iloc[:, 0:3], 
                                                                         vic_summ_df.iloc[:, 3:6],
                                                                         vic_summ_df.iloc[:, 6:9])

#     vic_summ_df.set_index(['Enrollments', 'EFTS', 'Completed'])
#     display(vic_summ_df)
    
    
    df = pd.concat([DF_DICT['TA-rent'], DF_DICT['REG-rent']], sort=True)
    ax = df.plot.area(colormap='gist_rainbow',
        title='Mean Geometric Rent')
    ax.set_xlim(pd.Timestamp('2006-01-01'), pd.Timestamp('2018-08-14'))
    display(DF_DICT['TA-rent'].head)
    
if __name__ == "__main__":
    main()


# rent_df.describe()
# region_df.describe()
















# pd.concat([pd.read_csv(f) for f in glob.glob(".\\data\\tertiary_summaries_08-16.csv")], 
#                   ignore_index = True, 
