# Milestone 4

## Lincoln Brown

## DSC540

## Professor Williams

In [2]:
import calendar
from datetime import datetime
import json
import os
import pandas as pd
import requests
import urllib.request
import urllib.parse

## Prep Work
First thing I need to do is create a list of months and years. I want to get the last day of the month so that I can use it in my api request to get the entire month's worth of weather data. I'll use the calendar library for this

In [10]:
# Make a dict to hold the month number and the date of the last day
months = {}

for i in range(1,13):
    # Get last day of the month
    date = calendar.monthrange(2022, i)
    # Add the month and last day to the dict
    months[i] = date[1]
# Make sure it looks good
print(months)
start_date = ""
end_date = ""

{1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}


In [11]:
# Now that I have a dictionary of the last day of the month for each month
# I can create my range of months and their associated last day
dates = {}
for i in range(10,13):
    dates[i] = months[i]

for i in range(1, 10):
    dates[i] = months[i]


In [12]:
# Import my API Key
key_file = 'weather_key.json'
with open(key_file, 'r') as api_file:
    json_key = json.load(api_file)

# Assign it to an encoded string for use in the URL
api_key = urllib.parse.urlencode(json_key)

In [13]:
# Request function that takes a dictionary including dates and my API key
def make_requests(values_dict):
    json_files = []
    # Define my api_key variable
    api_key = values_dict['key']
# Iterate through the list and then make the requests
    for date in values_dict['dates']:
        start_date,end_date = date
        url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Chicago%2CUnited%20States/{start_date}/{end_date}?unitGroup=metric&elements=datetime%2Cname%2Ctempmax%2Ctempmin%2Cfeelslike%2Chumidity%2Cprecip%2Cprecipprob%2Cpreciptype%2Csnowdepth%2Cwindgust%2Cwindspeed%2Cwindspeedmax%2Cwindspeedmean%2Cwinddir%2Ccloudcover%2Cvisibility%2Csolarradiation%2Csunrise%2Csunset%2Cmoonphase%2Cconditions%2Cdescription&include=obs%2Cdays&{api_key}&options=stnslevel1%2Cnonulls&contentType=json"
        # Need to parse out month and year to name the out file correctly
        date_object = datetime.strptime(start_date, "%Y-%m-%d")
        month = date_object.month
        year = date_object.year
        f_out = f"{month}_{year}.json"
        print(date_object.month)
        write_request_out(url, f_out)
        json_files.append(f_out)
    
    return json_files
        
            

In [26]:
# Function to check if the file exists (don't want to overburden the API when working)
# If request has not been made, make it
# Then write the results to a JSON file
def write_request_out(url, f_out):
    try:
        if os.path.exists(f_out):
            print("Request already made")
        else:
            r = requests.get(url)
            with open(f_out, 'w') as file:
                file.writelines(r.text)
            if r.status_code == 200:
                print("Request successful!")
            else:
                print(f"Request failed. \n {r.status}")
    except Exception as e:
        print(f"Error making request. \n{e}")

In [27]:
# Build the values_dict and attach years to all of the dates. September is singled out
# Because it includes the start and end month for 2022 and 2023. 
values_dict = {"key" : api_key, "dates": []}
for key,value in dates.items():
    print(key,value)
    if(key == 9):
        start_date_2022 = f"2022-{key}-01"
        end_date_2022 = f"2022-{key}-{value}"
        start_date_2023 = f"2023-{key}-01"
        end_date_2023 = f"2023-{key}-{value}"
        date_tuple_2022 = (start_date_2022, end_date_2022)
        date_tuple_2023 = (start_date_2023, end_date_2023)
        values_dict['dates'] += [date_tuple_2022]
        values_dict['dates'] += [date_tuple_2023]
    elif(key > 9):
        year = "2022"
        start_date = f"{year}-{key}-01"
        end_date = f"{year}-{key}-{value}"
        date_tuple = (start_date, end_date)
        values_dict['dates'] += [date_tuple]
    else:
        year = "2023"
        start_date = f"{year}-{key}-01"
        end_date = f"{year}-{key}-{value}"
        date_tuple = (start_date, end_date)
        values_dict['dates'] += [date_tuple]
        

10 31
11 30
12 31
1 31
2 28
3 31
4 30
5 31
6 30
7 31
8 31
9 30


In [29]:
# Make the requests
json_files = make_requests(values_dict)
# With our data successfully requested, the work can begin

10
Request successful!
11
Request successful!
12
Request successful!
1
Request successful!
2
Request successful!
3
Request successful!
4
Request successful!
5
Request successful!
6
Request successful!
7
Request successful!
8
Request successful!
9
Request successful!
9
Request successful!


## Cleaning Step 1:
First things first, I have 13 JSON objects that I need to get into one dataframe.
The wrangling process for this step is concatenating the individual month dataframes into a single dataframe.

In [12]:
# Loading the JSONs from file instead of requesting them again
json_files = []
for x in os.listdir('./Weather'):
    if '.json' in x:
        json_files.append(f'./Weather/{x}')

print(json_files)

['./Weather/12_2022.json', './Weather/7_2023.json', './Weather/5_2023.json', './Weather/09_2022.json', './Weather/11_2022.json', './Weather/2_2023.json', './Weather/1_2023.json', './Weather/9_2023.json', './Weather/6_2023.json', './Weather/10_2022.json', './Weather/8_2023.json', './Weather/4_2023.json', './Weather/3_2023.json']


In [13]:
weather_df = pd.DataFrame()
for file in json_files:
    with open(file, 'r') as json_file:
        data = json_file.readlines()
    for item in data:
        json_data = json.loads(item)

    json_df = pd.DataFrame(json_data)
    
    month_days = []
    for item in json_df['days']:
        month_days.append(item)
    
    month_df = pd.DataFrame.from_dict(month_days, orient='columns')
    weather_df = pd.concat([weather_df, month_df], ignore_index=True)

weather_df

Unnamed: 0,datetime,tempmax,tempmin,feelslike,humidity,precip,precipprob,windgust,windspeed,winddir,...,solarradiation,windspeedmax,windspeedmean,sunrise,sunset,moonphase,conditions,description,preciptype,snowdepth
0,2022-12-01,1.9,-7.8,-7.9,57.2,0.000,0.0,46.4,25.9,211.8,...,56.8,25.9,16.4,06:58:48,16:20:17,0.27,Partially cloudy,Partly cloudy throughout the day.,,
1,2022-12-02,11.3,2.2,4.1,53.5,0.000,0.0,62.4,34.0,198.8,...,31.0,34.0,27.3,06:59:51,16:20:01,0.31,Overcast,Cloudy skies throughout the day.,,
2,2022-12-03,10.7,-5.8,-6.6,56.0,0.000,0.0,65.9,39.8,273.0,...,57.7,39.8,23.7,07:00:52,16:19:48,0.34,Partially cloudy,Partly cloudy throughout the day.,,
3,2022-12-04,4.4,-7.1,-6.6,50.7,0.000,0.0,41.1,26.3,223.8,...,56.9,26.3,17.5,07:01:53,16:19:37,0.38,Clear,Clear conditions throughout the day.,,
4,2022-12-05,6.2,-1.4,-0.9,63.7,0.000,0.0,37.1,17.1,203.6,...,28.6,17.1,13.0,07:02:52,16:19:28,0.41,Partially cloudy,Partly cloudy throughout the day.,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-03-27,7.8,0.7,0.6,63.4,0.000,0.0,47.2,27.2,17.9,...,170.0,27.2,18.5,06:42:05,19:10:28,0.19,Partially cloudy,Partly cloudy throughout the day.,,
391,2023-03-28,11.0,-1.1,3.7,53.8,0.000,0.0,41.0,25.2,262.5,...,114.9,25.2,9.1,06:40:23,19:11:34,0.25,Partially cloudy,Partly cloudy throughout the day.,,
392,2023-03-29,5.3,-0.3,-0.7,49.9,0.000,0.0,53.6,31.6,300.6,...,107.5,31.6,15.9,06:38:40,19:12:41,0.26,Partially cloudy,Partly cloudy throughout the day.,,
393,2023-03-30,13.0,-2.2,2.6,47.6,0.000,0.0,44.6,22.3,132.4,...,109.9,22.3,14.5,06:36:58,19:13:47,0.29,Partially cloudy,Partly cloudy throughout the day.,,


## Step 2
The next step I will take is converting the Temperatures from Celsius to Farenheit.

In [14]:
def convert_temp(temp):
    temp = (temp * 9/5) + 35
    return(float(temp))

In [15]:
temp_cols = ['tempmax', 'tempmin', 'feelslike']
for col in temp_cols:
    weather_df.loc[:,f'{col}_F'] = weather_df.loc[:,col].apply(convert_temp)

weather_df

Unnamed: 0,datetime,tempmax,tempmin,feelslike,humidity,precip,precipprob,windgust,windspeed,winddir,...,sunrise,sunset,moonphase,conditions,description,preciptype,snowdepth,tempmax_F,tempmin_F,feelslike_F
0,2022-12-01,1.9,-7.8,-7.9,57.2,0.000,0.0,46.4,25.9,211.8,...,06:58:48,16:20:17,0.27,Partially cloudy,Partly cloudy throughout the day.,,,38.42,20.96,20.78
1,2022-12-02,11.3,2.2,4.1,53.5,0.000,0.0,62.4,34.0,198.8,...,06:59:51,16:20:01,0.31,Overcast,Cloudy skies throughout the day.,,,55.34,38.96,42.38
2,2022-12-03,10.7,-5.8,-6.6,56.0,0.000,0.0,65.9,39.8,273.0,...,07:00:52,16:19:48,0.34,Partially cloudy,Partly cloudy throughout the day.,,,54.26,24.56,23.12
3,2022-12-04,4.4,-7.1,-6.6,50.7,0.000,0.0,41.1,26.3,223.8,...,07:01:53,16:19:37,0.38,Clear,Clear conditions throughout the day.,,,42.92,22.22,23.12
4,2022-12-05,6.2,-1.4,-0.9,63.7,0.000,0.0,37.1,17.1,203.6,...,07:02:52,16:19:28,0.41,Partially cloudy,Partly cloudy throughout the day.,,,46.16,32.48,33.38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-03-27,7.8,0.7,0.6,63.4,0.000,0.0,47.2,27.2,17.9,...,06:42:05,19:10:28,0.19,Partially cloudy,Partly cloudy throughout the day.,,,49.04,36.26,36.08
391,2023-03-28,11.0,-1.1,3.7,53.8,0.000,0.0,41.0,25.2,262.5,...,06:40:23,19:11:34,0.25,Partially cloudy,Partly cloudy throughout the day.,,,54.80,33.02,41.66
392,2023-03-29,5.3,-0.3,-0.7,49.9,0.000,0.0,53.6,31.6,300.6,...,06:38:40,19:12:41,0.26,Partially cloudy,Partly cloudy throughout the day.,,,44.54,34.46,33.74
393,2023-03-30,13.0,-2.2,2.6,47.6,0.000,0.0,44.6,22.3,132.4,...,06:36:58,19:13:47,0.29,Partially cloudy,Partly cloudy throughout the day.,,,58.40,31.04,39.68


## Step 3
Now I am going to convert the precipitation columns from mm to inches

In [16]:
def convert_precip(mm):
    inches = mm / 25.4
    return(float(inches))

In [17]:
weather_df.columns

Index(['datetime', 'tempmax', 'tempmin', 'feelslike', 'humidity', 'precip',
       'precipprob', 'windgust', 'windspeed', 'winddir', 'cloudcover',
       'visibility', 'solarradiation', 'windspeedmax', 'windspeedmean',
       'sunrise', 'sunset', 'moonphase', 'conditions', 'description',
       'preciptype', 'snowdepth', 'tempmax_F', 'tempmin_F', 'feelslike_F'],
      dtype='object')

In [18]:
precip_cols = ["precip", 'snowdepth']
for col in precip_cols:
    weather_df.loc[:,f'{col}_in'] = weather_df.loc[:,col].apply(convert_precip)

weather_df

Unnamed: 0,datetime,tempmax,tempmin,feelslike,humidity,precip,precipprob,windgust,windspeed,winddir,...,moonphase,conditions,description,preciptype,snowdepth,tempmax_F,tempmin_F,feelslike_F,precip_in,snowdepth_in
0,2022-12-01,1.9,-7.8,-7.9,57.2,0.000,0.0,46.4,25.9,211.8,...,0.27,Partially cloudy,Partly cloudy throughout the day.,,,38.42,20.96,20.78,0.000000,
1,2022-12-02,11.3,2.2,4.1,53.5,0.000,0.0,62.4,34.0,198.8,...,0.31,Overcast,Cloudy skies throughout the day.,,,55.34,38.96,42.38,0.000000,
2,2022-12-03,10.7,-5.8,-6.6,56.0,0.000,0.0,65.9,39.8,273.0,...,0.34,Partially cloudy,Partly cloudy throughout the day.,,,54.26,24.56,23.12,0.000000,
3,2022-12-04,4.4,-7.1,-6.6,50.7,0.000,0.0,41.1,26.3,223.8,...,0.38,Clear,Clear conditions throughout the day.,,,42.92,22.22,23.12,0.000000,
4,2022-12-05,6.2,-1.4,-0.9,63.7,0.000,0.0,37.1,17.1,203.6,...,0.41,Partially cloudy,Partly cloudy throughout the day.,,,46.16,32.48,33.38,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-03-27,7.8,0.7,0.6,63.4,0.000,0.0,47.2,27.2,17.9,...,0.19,Partially cloudy,Partly cloudy throughout the day.,,,49.04,36.26,36.08,0.000000,
391,2023-03-28,11.0,-1.1,3.7,53.8,0.000,0.0,41.0,25.2,262.5,...,0.25,Partially cloudy,Partly cloudy throughout the day.,,,54.80,33.02,41.66,0.000000,
392,2023-03-29,5.3,-0.3,-0.7,49.9,0.000,0.0,53.6,31.6,300.6,...,0.26,Partially cloudy,Partly cloudy throughout the day.,,,44.54,34.46,33.74,0.000000,
393,2023-03-30,13.0,-2.2,2.6,47.6,0.000,0.0,44.6,22.3,132.4,...,0.29,Partially cloudy,Partly cloudy throughout the day.,,,58.40,31.04,39.68,0.000000,


## Step 4 
Now I need to convert the wind speed columns from kph to mph. 

In [19]:
def convert_speed(kmh):
    mph = kmh / 1.609344
    return float(mph)

In [20]:
wind_cols = ['windgust', 'windspeed', 'windspeedmax', 'windspeedmean']
for col in wind_cols:
    weather_df.loc[:,f'{col}_mph'] = weather_df.loc[:,col].apply(convert_speed)
    
weather_df

Unnamed: 0,datetime,tempmax,tempmin,feelslike,humidity,precip,precipprob,windgust,windspeed,winddir,...,snowdepth,tempmax_F,tempmin_F,feelslike_F,precip_in,snowdepth_in,windgust_mph,windspeed_mph,windspeedmax_mph,windspeedmean_mph
0,2022-12-01,1.9,-7.8,-7.9,57.2,0.000,0.0,46.4,25.9,211.8,...,,38.42,20.96,20.78,0.000000,,28.831623,16.093514,16.093514,10.190488
1,2022-12-02,11.3,2.2,4.1,53.5,0.000,0.0,62.4,34.0,198.8,...,,55.34,38.96,42.38,0.000000,,38.773562,21.126621,21.126621,16.963434
2,2022-12-03,10.7,-5.8,-6.6,56.0,0.000,0.0,65.9,39.8,273.0,...,,54.26,24.56,23.12,0.000000,,40.948362,24.730573,24.730573,14.726497
3,2022-12-04,4.4,-7.1,-6.6,50.7,0.000,0.0,41.1,26.3,223.8,...,,42.92,22.22,23.12,0.000000,,25.538356,16.342062,16.342062,10.873996
4,2022-12-05,6.2,-1.4,-0.9,63.7,0.000,0.0,37.1,17.1,203.6,...,,46.16,32.48,33.38,0.000000,,23.052871,10.625447,10.625447,8.077825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-03-27,7.8,0.7,0.6,63.4,0.000,0.0,47.2,27.2,17.9,...,,49.04,36.26,36.08,0.000000,,29.328720,16.901296,16.901296,11.495367
391,2023-03-28,11.0,-1.1,3.7,53.8,0.000,0.0,41.0,25.2,262.5,...,,54.80,33.02,41.66,0.000000,,25.476219,15.658554,15.658554,5.654478
392,2023-03-29,5.3,-0.3,-0.7,49.9,0.000,0.0,53.6,31.6,300.6,...,,44.54,34.46,33.74,0.000000,,33.305496,19.635330,19.635330,9.879802
393,2023-03-30,13.0,-2.2,2.6,47.6,0.000,0.0,44.6,22.3,132.4,...,,58.40,31.04,39.68,0.000000,,27.713155,13.856578,13.856578,9.009882


In [21]:
# Drop the metric columns
changed_cols = temp_cols+precip_cols+wind_cols
for col in changed_cols:
    del weather_df[col]
    
weather_df

Unnamed: 0,datetime,humidity,precipprob,winddir,cloudcover,visibility,solarradiation,sunrise,sunset,moonphase,...,preciptype,tempmax_F,tempmin_F,feelslike_F,precip_in,snowdepth_in,windgust_mph,windspeed_mph,windspeedmax_mph,windspeedmean_mph
0,2022-12-01,57.2,0.0,211.8,52.1,16.0,56.8,06:58:48,16:20:17,0.27,...,,38.42,20.96,20.78,0.000000,,28.831623,16.093514,16.093514,10.190488
1,2022-12-02,53.5,0.0,198.8,96.6,16.0,31.0,06:59:51,16:20:01,0.31,...,,55.34,38.96,42.38,0.000000,,38.773562,21.126621,21.126621,16.963434
2,2022-12-03,56.0,0.0,273.0,32.0,15.9,57.7,07:00:52,16:19:48,0.34,...,,54.26,24.56,23.12,0.000000,,40.948362,24.730573,24.730573,14.726497
3,2022-12-04,50.7,0.0,223.8,13.1,16.0,56.9,07:01:53,16:19:37,0.38,...,,42.92,22.22,23.12,0.000000,,25.538356,16.342062,16.342062,10.873996
4,2022-12-05,63.7,0.0,203.6,83.6,15.4,28.6,07:02:52,16:19:28,0.41,...,,46.16,32.48,33.38,0.000000,,23.052871,10.625447,10.625447,8.077825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-03-27,63.4,0.0,17.9,36.5,15.9,170.0,06:42:05,19:10:28,0.19,...,,49.04,36.26,36.08,0.000000,,29.328720,16.901296,16.901296,11.495367
391,2023-03-28,53.8,0.0,262.5,53.3,16.0,114.9,06:40:23,19:11:34,0.25,...,,54.80,33.02,41.66,0.000000,,25.476219,15.658554,15.658554,5.654478
392,2023-03-29,49.9,0.0,300.6,56.7,15.7,107.5,06:38:40,19:12:41,0.26,...,,44.54,34.46,33.74,0.000000,,33.305496,19.635330,19.635330,9.879802
393,2023-03-30,47.6,0.0,132.4,68.6,16.0,109.9,06:36:58,19:13:47,0.29,...,,58.40,31.04,39.68,0.000000,,27.713155,13.856578,13.856578,9.009882


## Step 5
Next I want to calculate the number of daylight hours to see if there is any significance between the number of daylight hours and crimes that occur. 

In [22]:
weather_df.loc[:,'sunrise'] = weather_df.loc[:,['datetime','sunrise']].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
weather_df.loc[:,'sunset'] = weather_df.loc[:,['datetime','sunset']].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
#weather_df['daylight_hours'] = 
weather_df.loc[:,'sunrise'] = pd.to_datetime(weather_df.loc[:,'sunrise'])
weather_df.loc[:,'sunset'] = pd.to_datetime(weather_df.loc[:,'sunset'])
weather_df['daylight_hours'] = weather_df.loc[:, 'sunset'] - weather_df.loc[:, 'sunrise']
weather_df

Unnamed: 0,datetime,humidity,precipprob,winddir,cloudcover,visibility,solarradiation,sunrise,sunset,moonphase,...,tempmax_F,tempmin_F,feelslike_F,precip_in,snowdepth_in,windgust_mph,windspeed_mph,windspeedmax_mph,windspeedmean_mph,daylight_hours
0,2022-12-01,57.2,0.0,211.8,52.1,16.0,56.8,2022-12-01 06:58:48,2022-12-01 16:20:17,0.27,...,38.42,20.96,20.78,0.000000,,28.831623,16.093514,16.093514,10.190488,0 days 09:21:29
1,2022-12-02,53.5,0.0,198.8,96.6,16.0,31.0,2022-12-02 06:59:51,2022-12-02 16:20:01,0.31,...,55.34,38.96,42.38,0.000000,,38.773562,21.126621,21.126621,16.963434,0 days 09:20:10
2,2022-12-03,56.0,0.0,273.0,32.0,15.9,57.7,2022-12-03 07:00:52,2022-12-03 16:19:48,0.34,...,54.26,24.56,23.12,0.000000,,40.948362,24.730573,24.730573,14.726497,0 days 09:18:56
3,2022-12-04,50.7,0.0,223.8,13.1,16.0,56.9,2022-12-04 07:01:53,2022-12-04 16:19:37,0.38,...,42.92,22.22,23.12,0.000000,,25.538356,16.342062,16.342062,10.873996,0 days 09:17:44
4,2022-12-05,63.7,0.0,203.6,83.6,15.4,28.6,2022-12-05 07:02:52,2022-12-05 16:19:28,0.41,...,46.16,32.48,33.38,0.000000,,23.052871,10.625447,10.625447,8.077825,0 days 09:16:36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-03-27,63.4,0.0,17.9,36.5,15.9,170.0,2023-03-27 06:42:05,2023-03-27 19:10:28,0.19,...,49.04,36.26,36.08,0.000000,,29.328720,16.901296,16.901296,11.495367,0 days 12:28:23
391,2023-03-28,53.8,0.0,262.5,53.3,16.0,114.9,2023-03-28 06:40:23,2023-03-28 19:11:34,0.25,...,54.80,33.02,41.66,0.000000,,25.476219,15.658554,15.658554,5.654478,0 days 12:31:11
392,2023-03-29,49.9,0.0,300.6,56.7,15.7,107.5,2023-03-29 06:38:40,2023-03-29 19:12:41,0.26,...,44.54,34.46,33.74,0.000000,,33.305496,19.635330,19.635330,9.879802,0 days 12:34:01
393,2023-03-30,47.6,0.0,132.4,68.6,16.0,109.9,2023-03-30 06:36:58,2023-03-30 19:13:47,0.29,...,58.40,31.04,39.68,0.000000,,27.713155,13.856578,13.856578,9.009882,0 days 12:36:49


## Step 6
Now I would like to reorganize the columns so the dataset makes a little more sense

In [23]:
reorg_cols = ['datetime','tempmax_F','tempmin_F','feelslike_F','humidity',
              'conditions','cloudcover','description','visibility','windspeed_mph',
              'windgust_mph','windspeedmax_mph','windspeedmean_mph','preciptype','precip_in',
              'snowdepth_in','moonphase','daylight_hours','solarradiation',
              'precipprob','winddir','sunrise','sunset']
weather_df = weather_df.reindex(columns=reorg_cols)

In [24]:
weather_df

Unnamed: 0,datetime,tempmax_F,tempmin_F,feelslike_F,humidity,conditions,cloudcover,description,visibility,windspeed_mph,...,preciptype,precip_in,snowdepth_in,moonphase,daylight_hours,solarradiation,precipprob,winddir,sunrise,sunset
0,2022-12-01,38.42,20.96,20.78,57.2,Partially cloudy,52.1,Partly cloudy throughout the day.,16.0,16.093514,...,,0.000000,,0.27,0 days 09:21:29,56.8,0.0,211.8,2022-12-01 06:58:48,2022-12-01 16:20:17
1,2022-12-02,55.34,38.96,42.38,53.5,Overcast,96.6,Cloudy skies throughout the day.,16.0,21.126621,...,,0.000000,,0.31,0 days 09:20:10,31.0,0.0,198.8,2022-12-02 06:59:51,2022-12-02 16:20:01
2,2022-12-03,54.26,24.56,23.12,56.0,Partially cloudy,32.0,Partly cloudy throughout the day.,15.9,24.730573,...,,0.000000,,0.34,0 days 09:18:56,57.7,0.0,273.0,2022-12-03 07:00:52,2022-12-03 16:19:48
3,2022-12-04,42.92,22.22,23.12,50.7,Clear,13.1,Clear conditions throughout the day.,16.0,16.342062,...,,0.000000,,0.38,0 days 09:17:44,56.9,0.0,223.8,2022-12-04 07:01:53,2022-12-04 16:19:37
4,2022-12-05,46.16,32.48,33.38,63.7,Partially cloudy,83.6,Partly cloudy throughout the day.,15.4,10.625447,...,,0.000000,,0.41,0 days 09:16:36,28.6,0.0,203.6,2022-12-05 07:02:52,2022-12-05 16:19:28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-03-27,49.04,36.26,36.08,63.4,Partially cloudy,36.5,Partly cloudy throughout the day.,15.9,16.901296,...,,0.000000,,0.19,0 days 12:28:23,170.0,0.0,17.9,2023-03-27 06:42:05,2023-03-27 19:10:28
391,2023-03-28,54.80,33.02,41.66,53.8,Partially cloudy,53.3,Partly cloudy throughout the day.,16.0,15.658554,...,,0.000000,,0.25,0 days 12:31:11,114.9,0.0,262.5,2023-03-28 06:40:23,2023-03-28 19:11:34
392,2023-03-29,44.54,34.46,33.74,49.9,Partially cloudy,56.7,Partly cloudy throughout the day.,15.7,19.635330,...,,0.000000,,0.26,0 days 12:34:01,107.5,0.0,300.6,2023-03-29 06:38:40,2023-03-29 19:12:41
393,2023-03-30,58.40,31.04,39.68,47.6,Partially cloudy,68.6,Partly cloudy throughout the day.,16.0,13.856578,...,,0.000000,,0.29,0 days 12:36:49,109.9,0.0,132.4,2023-03-30 06:36:58,2023-03-30 19:13:47


## Step 7
I want to use the moon phase column to create categorical labels for the different phases of the moon.

In [25]:
def get_phase(moon_phase):
    moon_phases = {"New Moon" : 0.125, "Waxing Crescent" : 0.25, "First Quarter": 0.375, 
                   "Waxing Gibbous": 0.5, "Full Moon": 0.625,"Waning Gibbous": 0.75, 
                   "Last Quarter": 0.825, "Waning Crescent": 1} 
    if moon_phase < moon_phases["New Moon"]:
        moon_str = "New Moon"
    elif moon_phase < moon_phases["Waxing Crescent"]:
        moon_str = "Waxing Crescent"
    elif moon_phase < moon_phases["First Quarter"]:
        moon_str = "First Quarter"
    elif moon_phase < moon_phases["Waxing Gibbous"]:
        moon_str = "Waxing Gibbous"
    elif moon_phase < moon_phases["Full Moon"]:
        moon_str = "Full Moon"
    elif moon_phase < moon_phases["Waning Gibbous"]:
        moon_str = "Waning Gibbous"
    elif moon_phase < moon_phases["Last Quarter"]:
        moon_str = "Last Quarter"
    elif moon_phase < moon_phases["Waning Crescent"]:
        moon_str = "Waning Crescent"
    return moon_str

In [26]:
weather_df['moonphase'] = weather_df['moonphase'].apply(get_phase)
weather_df

Unnamed: 0,datetime,tempmax_F,tempmin_F,feelslike_F,humidity,conditions,cloudcover,description,visibility,windspeed_mph,...,preciptype,precip_in,snowdepth_in,moonphase,daylight_hours,solarradiation,precipprob,winddir,sunrise,sunset
0,2022-12-01,38.42,20.96,20.78,57.2,Partially cloudy,52.1,Partly cloudy throughout the day.,16.0,16.093514,...,,0.000000,,First Quarter,0 days 09:21:29,56.8,0.0,211.8,2022-12-01 06:58:48,2022-12-01 16:20:17
1,2022-12-02,55.34,38.96,42.38,53.5,Overcast,96.6,Cloudy skies throughout the day.,16.0,21.126621,...,,0.000000,,First Quarter,0 days 09:20:10,31.0,0.0,198.8,2022-12-02 06:59:51,2022-12-02 16:20:01
2,2022-12-03,54.26,24.56,23.12,56.0,Partially cloudy,32.0,Partly cloudy throughout the day.,15.9,24.730573,...,,0.000000,,First Quarter,0 days 09:18:56,57.7,0.0,273.0,2022-12-03 07:00:52,2022-12-03 16:19:48
3,2022-12-04,42.92,22.22,23.12,50.7,Clear,13.1,Clear conditions throughout the day.,16.0,16.342062,...,,0.000000,,Waxing Gibbous,0 days 09:17:44,56.9,0.0,223.8,2022-12-04 07:01:53,2022-12-04 16:19:37
4,2022-12-05,46.16,32.48,33.38,63.7,Partially cloudy,83.6,Partly cloudy throughout the day.,15.4,10.625447,...,,0.000000,,Waxing Gibbous,0 days 09:16:36,28.6,0.0,203.6,2022-12-05 07:02:52,2022-12-05 16:19:28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-03-27,49.04,36.26,36.08,63.4,Partially cloudy,36.5,Partly cloudy throughout the day.,15.9,16.901296,...,,0.000000,,Waxing Crescent,0 days 12:28:23,170.0,0.0,17.9,2023-03-27 06:42:05,2023-03-27 19:10:28
391,2023-03-28,54.80,33.02,41.66,53.8,Partially cloudy,53.3,Partly cloudy throughout the day.,16.0,15.658554,...,,0.000000,,First Quarter,0 days 12:31:11,114.9,0.0,262.5,2023-03-28 06:40:23,2023-03-28 19:11:34
392,2023-03-29,44.54,34.46,33.74,49.9,Partially cloudy,56.7,Partly cloudy throughout the day.,15.7,19.635330,...,,0.000000,,First Quarter,0 days 12:34:01,107.5,0.0,300.6,2023-03-29 06:38:40,2023-03-29 19:12:41
393,2023-03-30,58.40,31.04,39.68,47.6,Partially cloudy,68.6,Partly cloudy throughout the day.,16.0,13.856578,...,,0.000000,,First Quarter,0 days 12:36:49,109.9,0.0,132.4,2023-03-30 06:36:58,2023-03-30 19:13:47


In [27]:
# I am going to save the dataset now, so that it is easy to import on the next Milestone
weather_df_out = "weather_data.csv"
weather_df.to_csv(weather_df_out)

## Ethical Considerations
I didn't do a lot of manipulations with this dataset that could have ethical implications. The biggest liberty I took with the changes I made was defining the phases of the moon into their named phases instead of a numeric representation. There are 8 phases of the moon and I just went ahead and used .125 increments to build up a function to match them wherever they fall within the range. Other than that, I just used formulas I found online to perform the conversions from metric to US Standard (or Imperial) measurements. These included temperatures, wind speeds, and amounts of precipitation. 