## Import Libraries and Files

In [209]:
# Import Libraries 
import pandas as pd
import numpy as np 
import os 
import requests
import json
from datetime import datetime
from dotenv import load_dotenv

In [211]:
# Import bikesharing data
folderpath = r'../Data/Original'

In [213]:
filepath = [os.path.join(folderpath, name) 
            for name in os.listdir(folderpath)]

os.listdir(folderpath) returns a list of files in the folderpath directory. os.path.join() combines the directory path with the directory name, creating a full path. Finally the list comprehension creates a loop through the file names and applies os.path.join(). 

In [215]:
# Concatenate Files
bike = pd.concat((pd.read_csv(f) for f in filepath), ignore_index=True)

In [216]:
# Import weather data
load_dotenv() # Load the .env file

True

In [219]:
token = os.getenv('APItoken')

In [221]:
# Get Data
r = requests.get('https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&datatypeid=TAVG&limit=1000&stationid=GHCND:USW00014732&startdate=2022-01-01&enddate=2022-12-31', headers={'token':token})

In [222]:
# Load the API response as Json
d = json.loads(r.text)

## Data Wrangling

In [225]:
# Get all items that correspond to TAVG
avg_temps = [item for item in d['results'] if item['datatype']=='TAVG']

In [227]:
# Get only all average temperature readings
data_temp = [item['date'] for item in avg_temps]

In [229]:
# Get the temperature from all average temperature readings
temps = [item['value'] for item in avg_temps]

In [231]:
# Pit lists in Dataframe
df_temp = pd.DataFrame()

In [233]:
# Convert date format
df_temp['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in data_temp] 

In [235]:
# Convert temperature format
df_temp['avgtemp'] = [float(v)/10.0 for v in temps]

In [237]:
## Convert bike format to datetime
bike['started_at'] = pd.to_datetime(bike['started_at'], dayfirst=True)
bike['date'] = pd.to_datetime(bike['started_at'], format='%Y-%m-%d').dt.date

In [239]:
# Match date format
bike['date'] = pd.to_datetime(bike['started_at'], format='%Y-%m-%d').dt.date
df_temp['date'] = pd.to_datetime(df_temp['date'], format='%Y-%m-%d').dt.date

## Merge Datasets

In [241]:
# Merge datasets
df_merged = bike.merge(df_temp, how = 'left', on = 'date', indicator = True)

In [243]:
# Check Merge
df_merged['_merge'].value_counts(dropna = False)

both          895485
left_only          0
right_only         0
Name: _merge, dtype: int64

## Save File

In [245]:
df_merged.to_pickle(r'../Data/Prepared/bike_final.pkl')