# Imports

In [204]:
import pandas as pd     # data processing
import numpy as np      # lin algebra
import datetime as dt   # datetime manipulation
import pytz             # timezone manipulation

# Loading the Data

In [205]:
FILE_PATH = r"C:\Users\btros\OneDrive\Documents\LSDS-Unit-2-Project\raw_csvs\detroit_hist_weather.csv"

weather   = pd.read_csv(FILE_PATH)

### Quick Inspection

In [206]:
weather.head(1) 

Unnamed: 0,dt,dt_iso,city_id,city_name,lat,lon,temp,temp_min,temp_max,pressure,...,rain_today,snow_1h,snow_3h,snow_24h,snow_today,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,1472688000,2016-09-01 00:00:00 +0000 UTC,4990729,,,,295.28,293.71,296.48,1017,...,,,,,,1,800,Clear,sky is clear,01n


In [207]:
weather.tail(1)

Unnamed: 0,dt,dt_iso,city_id,city_name,lat,lon,temp,temp_min,temp_max,pressure,...,rain_today,snow_1h,snow_3h,snow_24h,snow_today,clouds_all,weather_id,weather_main,weather_description,weather_icon
35140,1573776000,2019-11-15 00:00:00 +0000 UTC,4990729,,,,272.74,271.48,274.15,1024,...,,,,,,90,721,Haze,haze,50n


# TODO:
- match DF format with the 911 calls 
    - extract date time features
- flag severe weather incidents

In [208]:
weather = weather[['dt_iso', 'temp_min', 'temp_max', 'weather_id']]  # Retaining only the needed columns

In [209]:
def datetime_manip(df):
    col = 'dt_iso'
    eastern = pytz.timezone('US/Eastern')
    
    x   = df[col].split(" ")[:2]                             # Convert the time data to EST timezone so that it syncs with the 911_calls data
    x   = x[0] + "T" + x[1]
    x   = pd.to_datetime(x)
    x   = x.tz_localize(pytz.utc).tz_convert(eastern)
    x   = str(x).replace("-04:00", "").replace(" ", "T")
    x   = pd.to_datetime(x)
    
    df['year']  = x.year
    df['month'] = x.month
    df['day']   = x.day
    df['hour']  = x.hour
    
    return df
    

In [210]:
def get_day_part(df):
    """
    Extacts the time of day from the hour value by dividing by the hour knife
    1 = Morning  (0400 - 1000h)
    2 = Midday   (1000 - 1600h)
    3 = Evening  (1600 - 2200h)
    4 = Night    (2200 - 0400h)
    """    
    
    hour_knife = 6
    df['part_of_day'] = ((df['hour'] + 2) / hour_knife).astype(int)
    df['part_of_day'] = df['part_of_day'].replace(0, 4)
    
    # Drop the now redundant column
    df = df.drop(columns='dt_iso')
    
    return df

In [211]:
def to_fahrenheit(df):
    cols = ['temp_min', 'temp_max']      #   convert to local temp unit for user interperability 
    
    for col in cols:
        K = df[col]
        F = (K - 273.15) * 9/5 + 32
        df[col] = F
        
    return df


In [212]:
def flag_severe(df):
    extreme = [202, 212, 221, 502, 503, 504, 511,
               602, 622]
    
    col = 'weather_id'
    
    df['severe'] = np.where(df[col].isin(extreme), 1, 0)
    
    return df

In [213]:
weather = weather.apply(datetime_manip, axis=1)

In [214]:
weather = get_day_part(weather)

In [215]:
weather = to_fahrenheit(weather)

In [216]:
weather = flag_severe(weather)

In [217]:
weather.head()

Unnamed: 0,temp_min,temp_max,weather_id,year,month,day,hour,part_of_day,severe
0,69.008,73.994,800,2016,8,31,20,3,0
1,64.4,73.004,800,2016,8,31,21,3,0
2,64.4,71.996,800,2016,8,31,22,4,0
3,60.8,71.006,800,2016,8,31,23,4,0
4,55.004,69.998,800,2016,9,1,0,4,0


# Reformat the DataFrame For Readability 

In [218]:
weather = weather[['year', 'month', 'day', 'hour', 'part_of_day', 'weather_id', 'severe', 'temp_min', 'temp_max' ]]

In [219]:
weather

Unnamed: 0,year,month,day,hour,part_of_day,weather_id,severe,temp_min,temp_max
0,2016,8,31,20,3,800,0,69.008,73.994
1,2016,8,31,21,3,800,0,64.400,73.004
2,2016,8,31,22,4,800,0,64.400,71.996
3,2016,8,31,23,4,800,0,60.800,71.006
4,2016,9,1,0,4,800,0,55.004,69.998
...,...,...,...,...,...,...,...,...,...
35136,2019,11,14,16,3,500,0,28.994,33.998
35137,2019,11,14,17,3,500,0,28.400,33.998
35138,2019,11,14,17,3,721,0,28.400,33.998
35139,2019,11,14,18,3,721,0,28.994,33.998


In [235]:
groupby_cols = ['year', 'month', 'day', 'part_of_day']
mean_cols    = ['weather_id', 'severe', 'temp_min', 'temp_max']
weather = weather.groupby(groupby_cols)['weather_id', 'severe', 'temp_min', 'temp_max'].mean()

In [243]:
weather = test.reset_index()

In [245]:
weather.head(60)

Unnamed: 0,year,month,day,part_of_day,weather_id,severe,temp_min,temp_max
0,2016,8,31,3,800.0,0.0,66.704,73.499
1,2016,8,31,4,800.0,0.0,62.6,71.501
2,2016,9,1,1,801.666667,0.0,59.3,66.092
3,2016,9,1,2,651.166667,0.0,67.444747,72.874747
4,2016,9,1,3,606.833333,0.0,66.117594,71.118594
5,2016,9,1,4,751.166667,0.0,56.669,67.199
6,2016,9,2,1,800.0,0.0,56.3,62.765
7,2016,9,2,2,801.333333,0.0,67.301,76.1
8,2016,9,2,3,800.333333,0.0,67.067,73.19
9,2016,9,2,4,800.5,0.0,57.2,64.034


# Export the Wrangled DataFrame

In [246]:
weather.to_csv("detroit_weather_wrangled.csv")