# Milestone 4

## Lincoln Brown

## DSC540

## Professor Williams

In [9]:
import calendar
from datetime import datetime
import json
import os
import pandas as pd
import requests
import urllib.request
import urllib.parse

## Prep Work
First thing I need to do is create a list of months and years. I want to get the last day of the month so that I can use it in my api request to get the entire month's worth of weather data. I'll use the calendar library for this

In [10]:
# Make a dict to hold the month number and the date of the last day
months = {}

for i in range(1,13):
    # Get last day of the month
    date = calendar.monthrange(2022, i)
    # Add the month and last day to the dict
    months[i] = date[1]
# Make sure it looks good
print(months)
start_date = ""
end_date = ""

{1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}


In [11]:
# Now that I have a dictionary of the last day of the month for each month
# I can create my range of months and their associated last day
dates = {}
for i in range(10,13):
    dates[i] = months[i]

for i in range(1, 10):
    dates[i] = months[i]


In [12]:
# Import my API Key
key_file = 'weather_key.json'
with open(key_file, 'r') as api_file:
    json_key = json.load(api_file)

# Assign it to an encoded string for use in the URL
api_key = urllib.parse.urlencode(json_key)

In [13]:
# Request function that takes a dictionary including dates and my API key
def make_requests(values_dict):
    json_files = []
    # Define my api_key variable
    api_key = values_dict['key']
# Iterate through the list and then make the requests
    for date in values_dict['dates']:
        start_date,end_date = date
        url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Chicago%2CUnited%20States/{start_date}/{end_date}?unitGroup=metric&elements=datetime%2Cname%2Ctempmax%2Ctempmin%2Cfeelslike%2Chumidity%2Cprecip%2Cprecipprob%2Cpreciptype%2Csnowdepth%2Cwindgust%2Cwindspeed%2Cwindspeedmax%2Cwindspeedmean%2Cwinddir%2Ccloudcover%2Cvisibility%2Csolarradiation%2Csunrise%2Csunset%2Cmoonphase%2Cconditions%2Cdescription&include=obs%2Cdays&{api_key}&options=stnslevel1%2Cnonulls&contentType=json"
        # Need to parse out month and year to name the out file correctly
        date_object = datetime.strptime(start_date, "%Y-%m-%d")
        month = date_object.month
        year = date_object.year
        f_out = f"{month}_{year}.json"
        print(date_object.month)
        write_request_out(url, f_out)
        json_files.append(f_out)
    
    return json_files
        
            

In [26]:
# Function to check if the file exists (don't want to overburden the API when working)
# If request has not been made, make it
# Then write the results to a JSON file
def write_request_out(url, f_out):
    try:
        if os.path.exists(f_out):
            print("Request already made")
        else:
            r = requests.get(url)
            with open(f_out, 'w') as file:
                file.writelines(r.text)
            if r.status_code == 200:
                print("Request successful!")
            else:
                print(f"Request failed. \n {r.status}")
    except Exception as e:
        print(f"Error making request. \n{e}")

In [27]:
# Build the values_dict and attach years to all of the dates. September is singled out
# Because it includes the start and end month for 2022 and 2023. 
values_dict = {"key" : api_key, "dates": []}
for key,value in dates.items():
    print(key,value)
    if(key == 9):
        start_date_2022 = f"2022-{key}-01"
        end_date_2022 = f"2022-{key}-{value}"
        start_date_2023 = f"2023-{key}-01"
        end_date_2023 = f"2023-{key}-{value}"
        date_tuple_2022 = (start_date_2022, end_date_2022)
        date_tuple_2023 = (start_date_2023, end_date_2023)
        values_dict['dates'] += [date_tuple_2022]
        values_dict['dates'] += [date_tuple_2023]
    elif(key > 9):
        year = "2022"
        start_date = f"{year}-{key}-01"
        end_date = f"{year}-{key}-{value}"
        date_tuple = (start_date, end_date)
        values_dict['dates'] += [date_tuple]
    else:
        year = "2023"
        start_date = f"{year}-{key}-01"
        end_date = f"{year}-{key}-{value}"
        date_tuple = (start_date, end_date)
        values_dict['dates'] += [date_tuple]
        

10 31
11 30
12 31
1 31
2 28
3 31
4 30
5 31
6 30
7 31
8 31
9 30


In [29]:
# Make the requests
json_files = make_requests(values_dict)
# With our data successfully requested, the work can begin

10
Request successful!
11
Request successful!
12
Request successful!
1
Request successful!
2
Request successful!
3
Request successful!
4
Request successful!
5
Request successful!
6
Request successful!
7
Request successful!
8
Request successful!
9
Request successful!
9
Request successful!


## Cleaning Step 1:
First things first, I have 13 JSON objects that I need to get into one dataframe.
The wrangling process for this step is concatenating the individual month dataframes into a single dataframe.

In [30]:
weather_df = pd.DataFrame()
for file in json_files:
    with open(file, 'r') as json_file:
        data = json_file.readlines()
    for item in data:
        json_data = json.loads(item)

    json_df = pd.DataFrame(json_data)
    
    month_days = []
    for item in json_df['days']:
        month_days.append(item)
    
    month_df = pd.DataFrame.from_dict(month_days, orient='columns')
    weather_df = pd.concat([weather_df, month_df], ignore_index=True)

weather_df

Unnamed: 0,datetime,tempmax,tempmin,feelslike,humidity,precip,precipprob,windgust,windspeed,winddir,...,solarradiation,windspeedmax,windspeedmean,sunrise,sunset,moonphase,conditions,description,preciptype,snowdepth
0,2022-10-01,20.2,11.4,15.8,65.7,0.000,0.0,37.9,26.0,19.2,...,67.8,26.0,14.6,06:47:29,18:32:10,0.21,Clear,Clear conditions throughout the day.,,
1,2022-10-02,19.3,13.7,15.9,69.7,0.000,0.0,36.5,24.4,35.7,...,27.6,24.4,16.5,06:48:33,18:30:27,0.25,Partially cloudy,Partly cloudy throughout the day.,,
2,2022-10-03,19.9,9.3,14.6,57.9,0.000,0.0,,13.9,73.3,...,61.5,13.9,6.2,06:49:38,18:28:45,0.28,Clear,Clear conditions throughout the day.,,
3,2022-10-04,23.5,8.2,15.5,55.5,0.000,0.0,,11.5,201.1,...,40.3,11.5,3.9,06:50:43,18:27:03,0.32,Partially cloudy,Partly cloudy throughout the day.,,
4,2022-10-05,24.7,10.4,17.8,43.2,0.000,0.0,31.7,15.4,227.1,...,64.0,15.4,6.2,06:51:48,18:25:22,0.35,Partially cloudy,Partly cloudy throughout the day.,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-09-26,22.8,18.1,19.9,81.3,8.080,100.0,33.5,24.2,108.5,...,18.5,24.2,15.5,06:41:55,18:41:14,0.41,"Rain, Partially cloudy",Partly cloudy throughout the day with rain.,[rain],
391,2023-09-27,20.9,17.8,19.1,83.4,4.453,100.0,31.7,21.1,105.0,...,28.2,21.1,14.1,06:42:59,18:39:30,0.44,"Rain, Overcast",Cloudy skies throughout the day with rain.,[rain],
392,2023-09-28,20.5,17.8,19.0,79.9,0.098,100.0,,16.2,53.1,...,33.1,16.2,11.6,06:44:02,18:37:46,0.48,"Rain, Partially cloudy",Partly cloudy throughout the day with rain in ...,[rain],
393,2023-09-29,23.4,14.8,18.9,79.3,0.000,0.0,,16.3,116.3,...,42.1,16.3,6.8,06:45:06,18:36:02,0.50,Partially cloudy,Partly cloudy throughout the day.,,


## Step 2
The next step I will take is converting the Temperatures from Celsius to Farenheit.

In [31]:
def convert_temp(temp):
    temp = (temp * 9/5) + 35
    return(float(temp))

In [32]:
temp_cols = ['tempmax', 'tempmin', 'feelslike']
for col in temp_cols:
    weather_df.loc[:,f'{col}_F'] = weather_df.loc[:,col].apply(convert_temp)

weather_df

Unnamed: 0,datetime,tempmax,tempmin,feelslike,humidity,precip,precipprob,windgust,windspeed,winddir,...,sunrise,sunset,moonphase,conditions,description,preciptype,snowdepth,tempmax_F,tempmin_F,feelslike_F
0,2022-10-01,20.2,11.4,15.8,65.7,0.000,0.0,37.9,26.0,19.2,...,06:47:29,18:32:10,0.21,Clear,Clear conditions throughout the day.,,,71.36,55.52,63.44
1,2022-10-02,19.3,13.7,15.9,69.7,0.000,0.0,36.5,24.4,35.7,...,06:48:33,18:30:27,0.25,Partially cloudy,Partly cloudy throughout the day.,,,69.74,59.66,63.62
2,2022-10-03,19.9,9.3,14.6,57.9,0.000,0.0,,13.9,73.3,...,06:49:38,18:28:45,0.28,Clear,Clear conditions throughout the day.,,,70.82,51.74,61.28
3,2022-10-04,23.5,8.2,15.5,55.5,0.000,0.0,,11.5,201.1,...,06:50:43,18:27:03,0.32,Partially cloudy,Partly cloudy throughout the day.,,,77.30,49.76,62.90
4,2022-10-05,24.7,10.4,17.8,43.2,0.000,0.0,31.7,15.4,227.1,...,06:51:48,18:25:22,0.35,Partially cloudy,Partly cloudy throughout the day.,,,79.46,53.72,67.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-09-26,22.8,18.1,19.9,81.3,8.080,100.0,33.5,24.2,108.5,...,06:41:55,18:41:14,0.41,"Rain, Partially cloudy",Partly cloudy throughout the day with rain.,[rain],,76.04,67.58,70.82
391,2023-09-27,20.9,17.8,19.1,83.4,4.453,100.0,31.7,21.1,105.0,...,06:42:59,18:39:30,0.44,"Rain, Overcast",Cloudy skies throughout the day with rain.,[rain],,72.62,67.04,69.38
392,2023-09-28,20.5,17.8,19.0,79.9,0.098,100.0,,16.2,53.1,...,06:44:02,18:37:46,0.48,"Rain, Partially cloudy",Partly cloudy throughout the day with rain in ...,[rain],,71.90,67.04,69.20
393,2023-09-29,23.4,14.8,18.9,79.3,0.000,0.0,,16.3,116.3,...,06:45:06,18:36:02,0.50,Partially cloudy,Partly cloudy throughout the day.,,,77.12,61.64,69.02


## Step 3
Now I am going to convert the precipitation columns from mm to inches

In [33]:
def convert_precip(mm):
    inches = mm / 25.4
    return(float(inches))

In [34]:
weather_df.columns

Index(['datetime', 'tempmax', 'tempmin', 'feelslike', 'humidity', 'precip',
       'precipprob', 'windgust', 'windspeed', 'winddir', 'cloudcover',
       'visibility', 'solarradiation', 'windspeedmax', 'windspeedmean',
       'sunrise', 'sunset', 'moonphase', 'conditions', 'description',
       'preciptype', 'snowdepth', 'tempmax_F', 'tempmin_F', 'feelslike_F'],
      dtype='object')

In [35]:
precip_cols = ["precip", 'snowdepth']
for col in precip_cols:
    weather_df.loc[:,f'{col}_in'] = weather_df.loc[:,col].apply(convert_precip)

weather_df

Unnamed: 0,datetime,tempmax,tempmin,feelslike,humidity,precip,precipprob,windgust,windspeed,winddir,...,moonphase,conditions,description,preciptype,snowdepth,tempmax_F,tempmin_F,feelslike_F,precip_in,snowdepth_in
0,2022-10-01,20.2,11.4,15.8,65.7,0.000,0.0,37.9,26.0,19.2,...,0.21,Clear,Clear conditions throughout the day.,,,71.36,55.52,63.44,0.000000,
1,2022-10-02,19.3,13.7,15.9,69.7,0.000,0.0,36.5,24.4,35.7,...,0.25,Partially cloudy,Partly cloudy throughout the day.,,,69.74,59.66,63.62,0.000000,
2,2022-10-03,19.9,9.3,14.6,57.9,0.000,0.0,,13.9,73.3,...,0.28,Clear,Clear conditions throughout the day.,,,70.82,51.74,61.28,0.000000,
3,2022-10-04,23.5,8.2,15.5,55.5,0.000,0.0,,11.5,201.1,...,0.32,Partially cloudy,Partly cloudy throughout the day.,,,77.30,49.76,62.90,0.000000,
4,2022-10-05,24.7,10.4,17.8,43.2,0.000,0.0,31.7,15.4,227.1,...,0.35,Partially cloudy,Partly cloudy throughout the day.,,,79.46,53.72,67.04,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-09-26,22.8,18.1,19.9,81.3,8.080,100.0,33.5,24.2,108.5,...,0.41,"Rain, Partially cloudy",Partly cloudy throughout the day with rain.,[rain],,76.04,67.58,70.82,0.318110,
391,2023-09-27,20.9,17.8,19.1,83.4,4.453,100.0,31.7,21.1,105.0,...,0.44,"Rain, Overcast",Cloudy skies throughout the day with rain.,[rain],,72.62,67.04,69.38,0.175315,
392,2023-09-28,20.5,17.8,19.0,79.9,0.098,100.0,,16.2,53.1,...,0.48,"Rain, Partially cloudy",Partly cloudy throughout the day with rain in ...,[rain],,71.90,67.04,69.20,0.003858,
393,2023-09-29,23.4,14.8,18.9,79.3,0.000,0.0,,16.3,116.3,...,0.50,Partially cloudy,Partly cloudy throughout the day.,,,77.12,61.64,69.02,0.000000,


## Step 4 
Now I need to convert the wind speed columns from kph to mph. 

In [36]:
def convert_speed(kmh):
    mph = kmh / 1.609344
    return float(mph)

In [37]:
wind_cols = ['windgust', 'windspeed', 'windspeedmax', 'windspeedmean']
for col in wind_cols:
    weather_df.loc[:,f'{col}_mph'] = weather_df.loc[:,col].apply(convert_speed)
    
weather_df

Unnamed: 0,datetime,tempmax,tempmin,feelslike,humidity,precip,precipprob,windgust,windspeed,winddir,...,snowdepth,tempmax_F,tempmin_F,feelslike_F,precip_in,snowdepth_in,windgust_mph,windspeed_mph,windspeedmax_mph,windspeedmean_mph
0,2022-10-01,20.2,11.4,15.8,65.7,0.000,0.0,37.9,26.0,19.2,...,,71.36,55.52,63.44,0.000000,,23.549968,16.155651,16.155651,9.072019
1,2022-10-02,19.3,13.7,15.9,69.7,0.000,0.0,36.5,24.4,35.7,...,,69.74,59.66,63.62,0.000000,,22.680049,15.161457,15.161457,10.252625
2,2022-10-03,19.9,9.3,14.6,57.9,0.000,0.0,,13.9,73.3,...,,70.82,51.74,61.28,0.000000,,,8.637060,8.637060,3.852501
3,2022-10-04,23.5,8.2,15.5,55.5,0.000,0.0,,11.5,201.1,...,,77.30,49.76,62.90,0.000000,,,7.145769,7.145769,2.423348
4,2022-10-05,24.7,10.4,17.8,43.2,0.000,0.0,31.7,15.4,227.1,...,,79.46,53.72,67.04,0.000000,,19.697467,9.569116,9.569116,3.852501
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-09-26,22.8,18.1,19.9,81.3,8.080,100.0,33.5,24.2,108.5,...,,76.04,67.58,70.82,0.318110,,20.815935,15.037183,15.037183,9.631253
391,2023-09-27,20.9,17.8,19.1,83.4,4.453,100.0,31.7,21.1,105.0,...,,72.62,67.04,69.38,0.175315,,19.697467,13.110932,13.110932,8.761334
392,2023-09-28,20.5,17.8,19.0,79.9,0.098,100.0,,16.2,53.1,...,,71.90,67.04,69.20,0.003858,,,10.066213,10.066213,7.207906
393,2023-09-29,23.4,14.8,18.9,79.3,0.000,0.0,,16.3,116.3,...,,77.12,61.64,69.02,0.000000,,,10.128350,10.128350,4.225324


In [38]:
# Drop the metric columns
changed_cols = temp_cols+precip_cols+wind_cols
for col in changed_cols:
    del weather_df[col]
    
weather_df

Unnamed: 0,datetime,humidity,precipprob,winddir,cloudcover,visibility,solarradiation,sunrise,sunset,moonphase,...,preciptype,tempmax_F,tempmin_F,feelslike_F,precip_in,snowdepth_in,windgust_mph,windspeed_mph,windspeedmax_mph,windspeedmean_mph
0,2022-10-01,65.7,0.0,19.2,13.8,15.9,67.8,06:47:29,18:32:10,0.21,...,,71.36,55.52,63.44,0.000000,,23.549968,16.155651,16.155651,9.072019
1,2022-10-02,69.7,0.0,35.7,36.3,16.0,27.6,06:48:33,18:30:27,0.25,...,,69.74,59.66,63.62,0.000000,,22.680049,15.161457,15.161457,10.252625
2,2022-10-03,57.9,0.0,73.3,12.4,16.0,61.5,06:49:38,18:28:45,0.28,...,,70.82,51.74,61.28,0.000000,,,8.637060,8.637060,3.852501
3,2022-10-04,55.5,0.0,201.1,29.4,16.0,40.3,06:50:43,18:27:03,0.32,...,,77.30,49.76,62.90,0.000000,,,7.145769,7.145769,2.423348
4,2022-10-05,43.2,0.0,227.1,73.7,15.9,64.0,06:51:48,18:25:22,0.35,...,,79.46,53.72,67.04,0.000000,,19.697467,9.569116,9.569116,3.852501
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-09-26,81.3,100.0,108.5,86.6,15.3,18.5,06:41:55,18:41:14,0.41,...,[rain],76.04,67.58,70.82,0.318110,,20.815935,15.037183,15.037183,9.631253
391,2023-09-27,83.4,100.0,105.0,96.8,14.0,28.2,06:42:59,18:39:30,0.44,...,[rain],72.62,67.04,69.38,0.175315,,19.697467,13.110932,13.110932,8.761334
392,2023-09-28,79.9,100.0,53.1,87.2,15.9,33.1,06:44:02,18:37:46,0.48,...,[rain],71.90,67.04,69.20,0.003858,,,10.066213,10.066213,7.207906
393,2023-09-29,79.3,0.0,116.3,54.7,12.5,42.1,06:45:06,18:36:02,0.50,...,,77.12,61.64,69.02,0.000000,,,10.128350,10.128350,4.225324


## Step 5
Next I want to calculate the number of daylight hours to see if there is any significance between the number of daylight hours and crimes that occur. 

In [39]:
weather_df.loc[:,'sunrise'] = weather_df.loc[:,['datetime','sunrise']].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
weather_df.loc[:,'sunset'] = weather_df.loc[:,['datetime','sunset']].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
#weather_df['daylight_hours'] = 
weather_df.loc[:,'sunrise'] = pd.to_datetime(weather_df.loc[:,'sunrise'])
weather_df.loc[:,'sunset'] = pd.to_datetime(weather_df.loc[:,'sunset'])
weather_df['daylight_hours'] = weather_df.loc[:, 'sunset'] - weather_df.loc[:, 'sunrise']
weather_df

Unnamed: 0,datetime,humidity,precipprob,winddir,cloudcover,visibility,solarradiation,sunrise,sunset,moonphase,...,tempmax_F,tempmin_F,feelslike_F,precip_in,snowdepth_in,windgust_mph,windspeed_mph,windspeedmax_mph,windspeedmean_mph,daylight_hours
0,2022-10-01,65.7,0.0,19.2,13.8,15.9,67.8,2022-10-01 06:47:29,2022-10-01 18:32:10,0.21,...,71.36,55.52,63.44,0.000000,,23.549968,16.155651,16.155651,9.072019,0 days 11:44:41
1,2022-10-02,69.7,0.0,35.7,36.3,16.0,27.6,2022-10-02 06:48:33,2022-10-02 18:30:27,0.25,...,69.74,59.66,63.62,0.000000,,22.680049,15.161457,15.161457,10.252625,0 days 11:41:54
2,2022-10-03,57.9,0.0,73.3,12.4,16.0,61.5,2022-10-03 06:49:38,2022-10-03 18:28:45,0.28,...,70.82,51.74,61.28,0.000000,,,8.637060,8.637060,3.852501,0 days 11:39:07
3,2022-10-04,55.5,0.0,201.1,29.4,16.0,40.3,2022-10-04 06:50:43,2022-10-04 18:27:03,0.32,...,77.30,49.76,62.90,0.000000,,,7.145769,7.145769,2.423348,0 days 11:36:20
4,2022-10-05,43.2,0.0,227.1,73.7,15.9,64.0,2022-10-05 06:51:48,2022-10-05 18:25:22,0.35,...,79.46,53.72,67.04,0.000000,,19.697467,9.569116,9.569116,3.852501,0 days 11:33:34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-09-26,81.3,100.0,108.5,86.6,15.3,18.5,2023-09-26 06:41:55,2023-09-26 18:41:14,0.41,...,76.04,67.58,70.82,0.318110,,20.815935,15.037183,15.037183,9.631253,0 days 11:59:19
391,2023-09-27,83.4,100.0,105.0,96.8,14.0,28.2,2023-09-27 06:42:59,2023-09-27 18:39:30,0.44,...,72.62,67.04,69.38,0.175315,,19.697467,13.110932,13.110932,8.761334,0 days 11:56:31
392,2023-09-28,79.9,100.0,53.1,87.2,15.9,33.1,2023-09-28 06:44:02,2023-09-28 18:37:46,0.48,...,71.90,67.04,69.20,0.003858,,,10.066213,10.066213,7.207906,0 days 11:53:44
393,2023-09-29,79.3,0.0,116.3,54.7,12.5,42.1,2023-09-29 06:45:06,2023-09-29 18:36:02,0.50,...,77.12,61.64,69.02,0.000000,,,10.128350,10.128350,4.225324,0 days 11:50:56


## Step 6
Now I would like to reorganize the columns so the dataset makes a little more sense

In [40]:
reorg_cols = ['datetime','tempmax_F','tempmin_F','feelslike_F','humidity',
              'conditions','cloudcover','description','visibility','windspeed_mph',
              'windgust_mph','windspeedmax_mph','windspeedmean_mph','preciptype','precip_in',
              'snowdepth_in','moonphase','daylight_hours','solarradiation',
              'precipprob','winddir','sunrise','sunset']
weather_df = weather_df.reindex(columns=reorg_cols)

In [41]:
weather_df

Unnamed: 0,datetime,tempmax_F,tempmin_F,feelslike_F,humidity,conditions,cloudcover,description,visibility,windspeed_mph,...,preciptype,precip_in,snowdepth_in,moonphase,daylight_hours,solarradiation,precipprob,winddir,sunrise,sunset
0,2022-10-01,71.36,55.52,63.44,65.7,Clear,13.8,Clear conditions throughout the day.,15.9,16.155651,...,,0.000000,,0.21,0 days 11:44:41,67.8,0.0,19.2,2022-10-01 06:47:29,2022-10-01 18:32:10
1,2022-10-02,69.74,59.66,63.62,69.7,Partially cloudy,36.3,Partly cloudy throughout the day.,16.0,15.161457,...,,0.000000,,0.25,0 days 11:41:54,27.6,0.0,35.7,2022-10-02 06:48:33,2022-10-02 18:30:27
2,2022-10-03,70.82,51.74,61.28,57.9,Clear,12.4,Clear conditions throughout the day.,16.0,8.637060,...,,0.000000,,0.28,0 days 11:39:07,61.5,0.0,73.3,2022-10-03 06:49:38,2022-10-03 18:28:45
3,2022-10-04,77.30,49.76,62.90,55.5,Partially cloudy,29.4,Partly cloudy throughout the day.,16.0,7.145769,...,,0.000000,,0.32,0 days 11:36:20,40.3,0.0,201.1,2022-10-04 06:50:43,2022-10-04 18:27:03
4,2022-10-05,79.46,53.72,67.04,43.2,Partially cloudy,73.7,Partly cloudy throughout the day.,15.9,9.569116,...,,0.000000,,0.35,0 days 11:33:34,64.0,0.0,227.1,2022-10-05 06:51:48,2022-10-05 18:25:22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-09-26,76.04,67.58,70.82,81.3,"Rain, Partially cloudy",86.6,Partly cloudy throughout the day with rain.,15.3,15.037183,...,[rain],0.318110,,0.41,0 days 11:59:19,18.5,100.0,108.5,2023-09-26 06:41:55,2023-09-26 18:41:14
391,2023-09-27,72.62,67.04,69.38,83.4,"Rain, Overcast",96.8,Cloudy skies throughout the day with rain.,14.0,13.110932,...,[rain],0.175315,,0.44,0 days 11:56:31,28.2,100.0,105.0,2023-09-27 06:42:59,2023-09-27 18:39:30
392,2023-09-28,71.90,67.04,69.20,79.9,"Rain, Partially cloudy",87.2,Partly cloudy throughout the day with rain in ...,15.9,10.066213,...,[rain],0.003858,,0.48,0 days 11:53:44,33.1,100.0,53.1,2023-09-28 06:44:02,2023-09-28 18:37:46
393,2023-09-29,77.12,61.64,69.02,79.3,Partially cloudy,54.7,Partly cloudy throughout the day.,12.5,10.128350,...,,0.000000,,0.50,0 days 11:50:56,42.1,0.0,116.3,2023-09-29 06:45:06,2023-09-29 18:36:02


## Step 7
I want to use the moon phase column to create categorical labels for the different phases of the moon.

In [42]:
def get_phase(moon_phase):
    moon_phases = {"New Moon" : 0.125, "Waxing Crescent" : 0.25, "First Quarter": 0.375, 
                   "Waxing Gibbous": 0.5, "Full Moon": 0.625,"Waning Gibbous": 0.75, 
                   "Last Quarter": 0.825, "Waning Crescent": 1} 
    if moon_phase < moon_phases["New Moon"]:
        moon_str = "New Moon"
    elif moon_phase < moon_phases["Waxing Crescent"]:
        moon_str = "Waxing Crescent"
    elif moon_phase < moon_phases["First Quarter"]:
        moon_str = "First Quarter"
    elif moon_phase < moon_phases["Waxing Gibbous"]:
        moon_str = "Waxing Gibbous"
    elif moon_phase < moon_phases["Full Moon"]:
        moon_str = "Full Moon"
    elif moon_phase < moon_phases["Waning Gibbous"]:
        moon_str = "Waning Gibbous"
    elif moon_phase < moon_phases["Last Quarter"]:
        moon_str = "Last Quarter"
    elif moon_phase < moon_phases["Waning Crescent"]:
        moon_str = "Waning Crescent"
    return moon_str

In [43]:
weather_df['moonphase'] = weather_df['moonphase'].apply(get_phase)
weather_df

Unnamed: 0,datetime,tempmax_F,tempmin_F,feelslike_F,humidity,conditions,cloudcover,description,visibility,windspeed_mph,...,preciptype,precip_in,snowdepth_in,moonphase,daylight_hours,solarradiation,precipprob,winddir,sunrise,sunset
0,2022-10-01,71.36,55.52,63.44,65.7,Clear,13.8,Clear conditions throughout the day.,15.9,16.155651,...,,0.000000,,Waxing Crescent,0 days 11:44:41,67.8,0.0,19.2,2022-10-01 06:47:29,2022-10-01 18:32:10
1,2022-10-02,69.74,59.66,63.62,69.7,Partially cloudy,36.3,Partly cloudy throughout the day.,16.0,15.161457,...,,0.000000,,First Quarter,0 days 11:41:54,27.6,0.0,35.7,2022-10-02 06:48:33,2022-10-02 18:30:27
2,2022-10-03,70.82,51.74,61.28,57.9,Clear,12.4,Clear conditions throughout the day.,16.0,8.637060,...,,0.000000,,First Quarter,0 days 11:39:07,61.5,0.0,73.3,2022-10-03 06:49:38,2022-10-03 18:28:45
3,2022-10-04,77.30,49.76,62.90,55.5,Partially cloudy,29.4,Partly cloudy throughout the day.,16.0,7.145769,...,,0.000000,,First Quarter,0 days 11:36:20,40.3,0.0,201.1,2022-10-04 06:50:43,2022-10-04 18:27:03
4,2022-10-05,79.46,53.72,67.04,43.2,Partially cloudy,73.7,Partly cloudy throughout the day.,15.9,9.569116,...,,0.000000,,First Quarter,0 days 11:33:34,64.0,0.0,227.1,2022-10-05 06:51:48,2022-10-05 18:25:22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,2023-09-26,76.04,67.58,70.82,81.3,"Rain, Partially cloudy",86.6,Partly cloudy throughout the day with rain.,15.3,15.037183,...,[rain],0.318110,,Waxing Gibbous,0 days 11:59:19,18.5,100.0,108.5,2023-09-26 06:41:55,2023-09-26 18:41:14
391,2023-09-27,72.62,67.04,69.38,83.4,"Rain, Overcast",96.8,Cloudy skies throughout the day with rain.,14.0,13.110932,...,[rain],0.175315,,Waxing Gibbous,0 days 11:56:31,28.2,100.0,105.0,2023-09-27 06:42:59,2023-09-27 18:39:30
392,2023-09-28,71.90,67.04,69.20,79.9,"Rain, Partially cloudy",87.2,Partly cloudy throughout the day with rain in ...,15.9,10.066213,...,[rain],0.003858,,Waxing Gibbous,0 days 11:53:44,33.1,100.0,53.1,2023-09-28 06:44:02,2023-09-28 18:37:46
393,2023-09-29,77.12,61.64,69.02,79.3,Partially cloudy,54.7,Partly cloudy throughout the day.,12.5,10.128350,...,,0.000000,,Full Moon,0 days 11:50:56,42.1,0.0,116.3,2023-09-29 06:45:06,2023-09-29 18:36:02


In [44]:
# I am going to save the dataset now, so that it is easy to import on the next Milestone
weather_df_out = "weather_data.csv"
weather_df.to_csv(weather_df_out)

## Ethical Considerations
I didn't do a lot of manipulations with this dataset that could have ethical implications. The biggest liberty I took with the changes I made was defining the phases of the moon into their named phases instead of a numeric representation. There are 8 phases of the moon and I just went ahead and used .125 increments to build up a function to match them wherever they fall within the range. Other than that, I just used formulas I found online to perform the conversions from metric to US Standard (or Imperial) measurements. These included temperatures, wind speeds, and amounts of precipitation. 