In [82]:
# Import packages

import numpy as np
import pandas as pd
from datetime import *
import calendar

# Create constants for filenames

city_attrib_file = 'city_attributes.csv'
humidity_file = 'humidity.csv'
pressure_file = 'pressure.csv'
temperature_file = 'temperature.csv'
weather_description_file = 'converted.csv'
wind_direction_file = 'wind_direction.csv'
wind_speed_file = 'wind_speed.csv'

training_date_range = ('2013-10-2', '2016-12-31')
test_date_range = ('2017-1-1', '2017-11-30')
# List of all independent variable files we are pulling data from
attrib_files = [humidity_file, pressure_file, temperature_file, weather_description_file, wind_direction_file, wind_speed_file]



# Create lists of 12 cities
cities = list(pd.read_csv(humidity_file, sep=',').columns.values)
cities.remove('datetime')

devin_cities = cities[:12]
ethan_cities = cities[12:24]
phil_cities = cities[24:]


In [83]:
attrib_dfs = []
for file in attrib_files:
    attrib_dfs.append(pd.read_csv(file, sep=','))

In [84]:
# Create function that will form our training and test dataset 

# Args:
    # df = dataframe to append to
    # city_name = name of city we are creating the dataset for
    # date = date we are trying to predict (mm/dd/yyyy) as a str
# Return:
    # new df with added row
def create_training_df_for_city_date(df, city_name, date):
    datetime_obj = datetime.strptime(date, '%Y-%m-%d')
    
    new_data_row = []
    
    # For each independent attribute, get each feature
    for atrrib_df in attrib_dfs:
        # add the new tuple to the end of the list (row)
        attrib_1yr_3days_values = get_1yr_3days_attrib(city_name, datetime_obj, atrrib_df)
        for val in attrib_1yr_3days_values:
            new_data_row.append(val)
        
        
    # For each dependent attribute, get each feature (just 1 day)
    for atrrib_df in attrib_dfs:
        new_data_row.append(get_today_attrib(city_name, datetime_obj, atrrib_df))
        
    
    df.loc[len(df)] = new_data_row
    return df

def create_test_df_for_city_date(df, city_name, date):
    datetime_obj = datetime.strptime(date, '%Y-%m-%d')
    
    new_data_row = []
    
    # For each independent attribute, get each feature
    for attrib_df in attrib_dfs:
        # add the new tuple to the end of the list (row)
        attrib_1yr_3days_values = get_1yr_3days_attrib(city_name, datetime_obj, attrib_df)
        for val in attrib_1yr_3days_values:
            new_data_row.append(val)
            
    print(len(new_data_row))
    print(df.shape())
    df.loc[len(df)] = new_data_row
    return df

In [85]:
# Helper function to get 1 year ago and 3 days ago data from the date
    # for a given attribute (temp/pres/humidity...etc)
    
# Args:
    # city_name = name of city you want the data of
    # date = the current date, from which you want 1 year ago and the past 3 days
    # csv_file = the attribute you want to get
    
# Returns:
    # tuple of (1yr,3days,2days,1day)
def get_1yr_3days_attrib(city_name, date, attrib_df):
    ret = []
    
    year_1_date = date - timedelta(days=365)
    ret.append(get_avg(attrib_df, city_name, year_1_date))
    
    
    day_3_date = date - timedelta(days=3)
    ret.append(get_avg(attrib_df, city_name, day_3_date))
    
    day_2_date = date - timedelta(days=2)
    ret.append(get_avg(attrib_df, city_name, day_2_date))
    
    day_1_date = date - timedelta(days=1)
    ret.append(get_avg(attrib_df, city_name, day_1_date))
    
    return ret

In [86]:
# Get today's attribute (avg'ed) based on city and csv_file
# Args:
    # city_name = name of city you want the data of
    # date = the current date, from which you want 1 year ago and the past 3 days
    # csv_file = the attribute you want to get
# Returns:
    # the avg of the attribute for the given day and city
def get_today_attrib(city_name, date, attrib_df):
    return get_avg(attrib_df, city_name, date)

In [87]:
def get_avg(dataframe, city, date):
    strdate = date.strftime("%Y-%m-%d")
    daily = dataframe[['datetime', city]].copy() #create dataframe of just datetimes and that city 
    day = daily[daily['datetime'].str.contains(strdate)]   #filter above dataframe for a specific day
    valGood = day.dropna()
    vals = list(valGood[city])     #create list of all temps for that day
    return np.mean(vals)

In [88]:
def dates_list(date_range):
    dates = []
    date1 = datetime.strptime(date_range[0], "%Y-%m-%d")
    date2 = datetime.strptime(date_range[1], "%Y-%m-%d")
    delta = date2 - date1       # timedelta
    for i in range(delta.days + 1):
        newDate = (date1 + timedelta(days=i))
        dates.append(newDate.strftime('%Y-%m-%d'))
    return dates

In [89]:
def get_date_city_combo(city, date_range):
    #create city + date tuples in a list
    city_date_combo = []
    for date in date_range:
        tup = (city, date)
        city_date_combo.append(tup)
    return city_date_combo

In [90]:
def get_all_cities():
    cities = list(humidity.columns.values)
    cities.remove('datetime')
    return cities

In [93]:
def create_training_df_for_city(city):
    columns = []
    for index in attrib_files:
        index = index.rstrip('.csv')
        columns.append(index + '_1year')
        columns.append(index + '_3days')
        columns.append(index + '_2days')
        columns.append(index + '_1days')
    for index in attrib_files:
        columns.append(index + '_today')
    df = pd.DataFrame(columns=columns)
    city_date_combos = get_date_city_combo(city, dates_list(training_date_range))
    for city_date in city_date_combos:
        df = create_training_df_for_city_date(df, city_date[0], city_date[1])
        print(df)
        
    return df

In [94]:
# main - test creating df:

for city in devin_cities:
    vancouver_training_df = create_training_df_for_city(city)
    vancouver_training_df.to_csv(city + '_training.csv', sep = ',', index=False)

print(df)

   humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0          77.375          88.625       85.958333       93.083333   

   pressure_1year  pressure_3days  pressure_2days  pressure_1days  \
0      966.769231         996.125      996.708333     1016.708333   

   temperature_1year  temperature_3days          ...           \
0          286.14519         284.362958          ...            

   wind_speed_1year  wind_speed_3days  wind_speed_2days  wind_speed_1days  \
0               0.0             5.875              6.75          3.458333   

   humidity.csv_today  pressure.csv_today  temperature.csv_today  \
0           87.190476         1019.416667              281.57534   

   converted.csv_today  wind_direction.csv_today  wind_speed.csv_today  
0             3.333333                    77.625              2.041667  

[1 rows x 30 columns]
   humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0          77.375       88.625000       85.958333       93.08

   humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0       77.375000       88.625000       85.958333       93.083333   
1       71.625000       85.958333       93.083333       87.190476   
2       49.000000       93.083333       87.190476       88.050000   
3       43.409091       87.190476       88.050000       83.772727   
4       43.166667       88.050000       83.772727       82.000000   
5       49.888889       83.772727       82.000000       87.875000   

   pressure_1year  pressure_3days  pressure_2days  pressure_1days  \
0      966.769231      996.125000      996.708333     1016.708333   
1     1010.000000      996.708333     1016.708333     1019.416667   
2     1019.833333     1016.708333     1019.416667     1023.666667   
3     1015.500000     1019.416667     1023.666667     1023.500000   
4     1015.842105     1023.666667     1023.500000     1026.666667   
5     1018.857143     1023.500000     1026.666667     1018.166667   

   temperature_1year  temperatur

   humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0       77.375000       88.625000       85.958333       93.083333   
1       71.625000       85.958333       93.083333       87.190476   
2       49.000000       93.083333       87.190476       88.050000   
3       43.409091       87.190476       88.050000       83.772727   
4       43.166667       88.050000       83.772727       82.000000   
5       49.888889       83.772727       82.000000       87.875000   
6       59.842105       82.000000       87.875000       83.833333   
7       61.875000       87.875000       83.833333       88.666667   
8       76.173913       83.833333       88.666667       79.583333   

   pressure_1year  pressure_3days  pressure_2days  pressure_1days  \
0      966.769231      996.125000      996.708333     1016.708333   
1     1010.000000      996.708333     1016.708333     1019.416667   
2     1019.833333     1016.708333     1019.416667     1023.666667   
3     1015.500000     1019.416667

    humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0        77.375000       88.625000       85.958333       93.083333   
1        71.625000       85.958333       93.083333       87.190476   
2        49.000000       93.083333       87.190476       88.050000   
3        43.409091       87.190476       88.050000       83.772727   
4        43.166667       88.050000       83.772727       82.000000   
5        49.888889       83.772727       82.000000       87.875000   
6        59.842105       82.000000       87.875000       83.833333   
7        61.875000       87.875000       83.833333       88.666667   
8        76.173913       83.833333       88.666667       79.583333   
9        76.380952       88.666667       79.583333       78.583333   
10       89.956522       79.583333       78.583333       85.958333   

    pressure_1year  pressure_3days  pressure_2days  pressure_1days  \
0       966.769231      996.125000      996.708333     1016.708333   
1      1010.000000 

    humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0        77.375000       88.625000       85.958333       93.083333   
1        71.625000       85.958333       93.083333       87.190476   
2        49.000000       93.083333       87.190476       88.050000   
3        43.409091       87.190476       88.050000       83.772727   
4        43.166667       88.050000       83.772727       82.000000   
5        49.888889       83.772727       82.000000       87.875000   
6        59.842105       82.000000       87.875000       83.833333   
7        61.875000       87.875000       83.833333       88.666667   
8        76.173913       83.833333       88.666667       79.583333   
9        76.380952       88.666667       79.583333       78.583333   
10       89.956522       79.583333       78.583333       85.958333   
11       86.739130       78.583333       85.958333       86.583333   
12       92.791667       85.958333       86.583333       88.333333   

    pressure_1year 

    humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0        77.375000       88.625000       85.958333       93.083333   
1        71.625000       85.958333       93.083333       87.190476   
2        49.000000       93.083333       87.190476       88.050000   
3        43.409091       87.190476       88.050000       83.772727   
4        43.166667       88.050000       83.772727       82.000000   
5        49.888889       83.772727       82.000000       87.875000   
6        59.842105       82.000000       87.875000       83.833333   
7        61.875000       87.875000       83.833333       88.666667   
8        76.173913       83.833333       88.666667       79.583333   
9        76.380952       88.666667       79.583333       78.583333   
10       89.956522       79.583333       78.583333       85.958333   
11       86.739130       78.583333       85.958333       86.583333   
12       92.791667       85.958333       86.583333       88.333333   
13       91.142857  

    humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0        77.375000       88.625000       85.958333       93.083333   
1        71.625000       85.958333       93.083333       87.190476   
2        49.000000       93.083333       87.190476       88.050000   
3        43.409091       87.190476       88.050000       83.772727   
4        43.166667       88.050000       83.772727       82.000000   
5        49.888889       83.772727       82.000000       87.875000   
6        59.842105       82.000000       87.875000       83.833333   
7        61.875000       87.875000       83.833333       88.666667   
8        76.173913       83.833333       88.666667       79.583333   
9        76.380952       88.666667       79.583333       78.583333   
10       89.956522       79.583333       78.583333       85.958333   
11       86.739130       78.583333       85.958333       86.583333   
12       92.791667       85.958333       86.583333       88.333333   
13       91.142857  

    humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0        77.375000       88.625000       85.958333       93.083333   
1        71.625000       85.958333       93.083333       87.190476   
2        49.000000       93.083333       87.190476       88.050000   
3        43.409091       87.190476       88.050000       83.772727   
4        43.166667       88.050000       83.772727       82.000000   
5        49.888889       83.772727       82.000000       87.875000   
6        59.842105       82.000000       87.875000       83.833333   
7        61.875000       87.875000       83.833333       88.666667   
8        76.173913       83.833333       88.666667       79.583333   
9        76.380952       88.666667       79.583333       78.583333   
10       89.956522       79.583333       78.583333       85.958333   
11       86.739130       78.583333       85.958333       86.583333   
12       92.791667       85.958333       86.583333       88.333333   
13       91.142857  

    humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0        77.375000       88.625000       85.958333       93.083333   
1        71.625000       85.958333       93.083333       87.190476   
2        49.000000       93.083333       87.190476       88.050000   
3        43.409091       87.190476       88.050000       83.772727   
4        43.166667       88.050000       83.772727       82.000000   
5        49.888889       83.772727       82.000000       87.875000   
6        59.842105       82.000000       87.875000       83.833333   
7        61.875000       87.875000       83.833333       88.666667   
8        76.173913       83.833333       88.666667       79.583333   
9        76.380952       88.666667       79.583333       78.583333   
10       89.956522       79.583333       78.583333       85.958333   
11       86.739130       78.583333       85.958333       86.583333   
12       92.791667       85.958333       86.583333       88.333333   
13       91.142857  

    humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0        77.375000       88.625000       85.958333       93.083333   
1        71.625000       85.958333       93.083333       87.190476   
2        49.000000       93.083333       87.190476       88.050000   
3        43.409091       87.190476       88.050000       83.772727   
4        43.166667       88.050000       83.772727       82.000000   
5        49.888889       83.772727       82.000000       87.875000   
6        59.842105       82.000000       87.875000       83.833333   
7        61.875000       87.875000       83.833333       88.666667   
8        76.173913       83.833333       88.666667       79.583333   
9        76.380952       88.666667       79.583333       78.583333   
10       89.956522       79.583333       78.583333       85.958333   
11       86.739130       78.583333       85.958333       86.583333   
12       92.791667       85.958333       86.583333       88.333333   
13       91.142857  

    humidity_1year  humidity_3days  humidity_2days  humidity_1days  \
0        77.375000       88.625000       85.958333       93.083333   
1        71.625000       85.958333       93.083333       87.190476   
2        49.000000       93.083333       87.190476       88.050000   
3        43.409091       87.190476       88.050000       83.772727   
4        43.166667       88.050000       83.772727       82.000000   
5        49.888889       83.772727       82.000000       87.875000   
6        59.842105       82.000000       87.875000       83.833333   
7        61.875000       87.875000       83.833333       88.666667   
8        76.173913       83.833333       88.666667       79.583333   
9        76.380952       88.666667       79.583333       78.583333   
10       89.956522       79.583333       78.583333       85.958333   
11       86.739130       78.583333       85.958333       86.583333   
12       92.791667       85.958333       86.583333       88.333333   
13       91.142857  

KeyboardInterrupt: 