In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(color_codes = True)
import calendar
from datetime import datetime
import math

In [3]:
weather = pd.read_csv("climate-daily.csv", low_memory = False)

In [4]:
weather.head()

Unnamed: 0,x,y,TOTAL_PRECIPITATION,SNOW_ON_GROUND,SPEED_MAX_GUST_FLAG,CLIMATE_IDENTIFIER,DIRECTION_MAX_GUST_FLAG,LOCAL_MONTH,TOTAL_SNOW,PROVINCE_CODE,...,MAX_TEMPERATURE_FLAG,STATION_NAME,MIN_TEMPERATURE,COOLING_DEGREE_DAYS,SPEED_MAX_GUST,TOTAL_SNOW_FLAG,SNOW_ON_GROUND_FLAG,TOTAL_PRECIPITATION_FLAG,HEATING_DEGREE_DAYS,MAX_REL_HUMIDITY
0,-79.4,43.666667,,,,6158350,,3,,ON,...,,TORONTO,0.0,0.0,,,,,13.8,
1,-79.4,43.666667,,,,6158350,,3,,ON,...,,TORONTO,1.1,0.0,,,,,13.5,
2,-79.4,43.666667,,,,6158350,,3,,ON,...,,TORONTO,2.2,0.0,,,,,11.3,
3,-79.4,43.666667,,,,6158350,,3,,ON,...,,TORONTO,-3.9,0.0,,,,,12.4,
4,-79.4,43.666667,,,,6158350,,3,,ON,...,,TORONTO,-1.1,0.0,,,,,15.2,


In [5]:
weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62116 entries, 0 to 62115
Data columns (total 36 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   x                         62116 non-null  float64
 1   y                         62116 non-null  float64
 2   TOTAL_PRECIPITATION       60817 non-null  float64
 3   SNOW_ON_GROUND            19824 non-null  float64
 4   SPEED_MAX_GUST_FLAG       0 non-null      float64
 5   CLIMATE_IDENTIFIER        62116 non-null  int64  
 6   DIRECTION_MAX_GUST_FLAG   0 non-null      float64
 7   LOCAL_MONTH               62116 non-null  int64  
 8   TOTAL_SNOW                60867 non-null  float64
 9   PROVINCE_CODE             62116 non-null  object 
 10  MIN_TEMPERATURE_FLAG      9 non-null      object 
 11  HEATING_DEGREE_DAYS_FLAG  5 non-null      object 
 12  MIN_REL_HUMIDITY          1 non-null      float64
 13  LOCAL_DAY                 62116 non-null  int64  
 14  MEAN_T

In [12]:
def day_mean(day, month, column, rounded = True):
    ''' finds the mean value of a column on a specific day
        ex. average high temperature on March 7

        arguments:
            day: the numerical day to use, int
            month: the numerical month to use, int
            column: the column on the .csv to sort by (ex. 'MEAN_TEMPERATURE'), string
            rounded: an optional argument to round the averages to two decimal points
                set to true by default, boolean
        
        return:
            the numerical mean on that column and day
        '''

    df_oneday = weather.loc[(weather["LOCAL_MONTH"] == month) & (weather["LOCAL_DAY"] == day)]
    mean_value = df_oneday[column].mean()

    if rounded:
        return round(mean_value, 0)

In [17]:
mean_for_year = {}
months = list(calendar.month_name[1:])
for month in range(1, 13):
    for day in range(1, 32):
        temps_day = day_mean(day, month, "MEAN_TEMPERATURE")
        rain_day = day_mean(day, month, "TOTAL_PRECIPITATION")
        snow_day = day_mean(day, month, "SNOW_ON_GROUND")
        if not math.isnan(temps_day):
            mean_for_year[f"{calendar.month_name[month]} {day}"] = {"MEAN_TEMPERATURE": temps_day, "TOTAL_PRECIPITATION": rain_day, "SNOW_ON_GROUND": snow_day}
pandas_dict = pd.DataFrame(mean_for_year).T
pandas_dict

Unnamed: 0,MEAN_TEMPERATURE,TOTAL_PRECIPITATION,SNOW_ON_GROUND
January 1,-4.0,2.0,5.0
January 2,-4.0,3.0,5.0
January 3,-4.0,2.0,6.0
January 4,-4.0,2.0,6.0
January 5,-4.0,2.0,6.0
...,...,...,...
December 27,-3.0,2.0,5.0
December 28,-3.0,2.0,5.0
December 29,-4.0,2.0,5.0
December 30,-4.0,2.0,5.0


In [8]:
with pd.option_context('display.max_rows', None,):
    print(pandas_dict)

              MEAN_TEMPERATURE  TOTAL_PRECIPITATION  SNOW_ON_GROUND
January 1                -3.68                 2.20            4.81
January 2                -3.69                 2.57            5.08
January 3                -3.86                 2.08            5.81
January 4                -4.00                 1.71            5.71
January 5                -4.15                 2.25            5.74
January 6                -3.86                 2.12            5.52
January 7                -4.60                 2.09            6.19
January 8                -5.13                 2.32            6.59
January 9                -4.78                 1.96            6.48
January 10               -4.91                 2.20            6.60
January 11               -5.11                 2.36            6.48
January 12               -5.09                 2.05            7.02
January 13               -4.80                 2.09            7.22
January 14               -4.75                 2

In [18]:
sorted_dict = pandas_dict.sort_values(by = ['MEAN_TEMPERATURE', 'TOTAL_PRECIPITATION'], ascending = [False, False])
sorted_dict

Unnamed: 0,MEAN_TEMPERATURE,TOTAL_PRECIPITATION,SNOW_ON_GROUND
July 17,22.0,3.0,0.0
July 18,22.0,3.0,0.0
July 15,21.0,4.0,0.0
August 4,21.0,4.0,0.0
July 7,21.0,3.0,0.0
...,...,...,...
February 3,-6.0,2.0,9.0
February 4,-6.0,2.0,9.0
February 5,-6.0,2.0,8.0
February 10,-6.0,2.0,8.0


In [19]:
print(sorted_dict.iloc[0])

MEAN_TEMPERATURE       22.0
TOTAL_PRECIPITATION     3.0
SNOW_ON_GROUND          0.0
Name: July 17, dtype: float64
