# Data Scraper 
This notebook contains the code used to fill the `data/` folder. 

In [1]:
import requests 
import pandas as pd 
import os
from dotenv import load_dotenv
load_dotenv()


True

### Weather Data
The next set of cells will scrape weather data from X to Y and store it under `data/weather_data.csv`.

In [2]:
## Constants

BASE_WEATHER_API = "https://archive-api.open-meteo.com/v1/archive"
LATITUDE = 41.5868
LONGITUDE = 93.6250
START_DATE = "2000-03-24"
END_DATE = "2023-04-07"
DAILY = ["temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "precipitation_sum"]
TIMEZONE = "GMT"
TEMPERATURE_UNITS = "celsius"
WINDSPEED_UNITS = "kmh"
PRECIPITATION_UNIT = "mm"
TIME_FORMAT = "iso8601"
# https://archive-api.open-meteo.com/v1/archive?daily=precipitation_sum&timezone=GMT&temperature_unit=celsius&windspeed_unit=kmh&precipitation_unit=mm&timeformat=iso8601

In [3]:
query_params = f"latitude={LATITUDE}&longitude={LONGITUDE}&start_date={START_DATE}&end_date={END_DATE}&{'&'.join(f'daily={val}' for val in DAILY)}&timezone={TIMEZONE}&temperature_unit={TEMPERATURE_UNITS}&windspeed_unit={WINDSPEED_UNITS}&precipitation_unit={PRECIPITATION_UNIT}&timeformat={TIME_FORMAT}"
resp = requests.get(f"{BASE_WEATHER_API}?{query_params}")

json = resp.json() 

In [4]:
print(json["daily"].keys())

dict_keys(['time', 'temperature_2m_max', 'temperature_2m_min', 'temperature_2m_mean', 'precipitation_sum'])


As suggested by the code cell above, the API returned JSON with keys for each of the variables of interest; the values of each key are a list of values, one for each date. We now reformat the data to make it more usable. 

In [5]:
daily = json["daily"]

data = {}

for key in daily: 
    for i, val in enumerate(daily[key]): 
        if i not in data: data[i] = {}
        data[i][key] = val

# Showing results of transformation: 
for i in range(10): 
    print(data[i])

{'time': '2000-03-24', 'temperature_2m_max': 12.6, 'temperature_2m_min': -6.6, 'temperature_2m_mean': 5.7, 'precipitation_sum': 0.0}
{'time': '2000-03-25', 'temperature_2m_max': 15.0, 'temperature_2m_min': -1.0, 'temperature_2m_mean': 8.7, 'precipitation_sum': 0.0}
{'time': '2000-03-26', 'temperature_2m_max': 18.7, 'temperature_2m_min': 2.4, 'temperature_2m_mean': 11.2, 'precipitation_sum': 0.0}
{'time': '2000-03-27', 'temperature_2m_max': 22.9, 'temperature_2m_min': 1.5, 'temperature_2m_mean': 14.8, 'precipitation_sum': 0.0}
{'time': '2000-03-28', 'temperature_2m_max': 22.8, 'temperature_2m_min': 7.2, 'temperature_2m_mean': 14.8, 'precipitation_sum': 0.0}
{'time': '2000-03-29', 'temperature_2m_max': 20.1, 'temperature_2m_min': 6.2, 'temperature_2m_mean': 13.2, 'precipitation_sum': 0.0}
{'time': '2000-03-30', 'temperature_2m_max': 17.7, 'temperature_2m_min': 3.3, 'temperature_2m_mean': 11.8, 'precipitation_sum': 0.0}
{'time': '2000-03-31', 'temperature_2m_max': 14.7, 'temperature_2m_mi

In [6]:
weather_df = pd.DataFrame.from_dict(data=data, orient="index")
weather_df.tail()

Unnamed: 0,time,temperature_2m_max,temperature_2m_min,temperature_2m_mean,precipitation_sum
8410,2023-04-03,6.0,-4.5,0.9,1.2
8411,2023-04-04,2.2,-6.6,-1.5,1.8
8412,2023-04-05,8.5,-7.0,2.3,0.0
8413,2023-04-06,,,,
8414,2023-04-07,,,,


In [7]:
weather_df.to_csv(path_or_buf="../data/weather_data.csv")

### Crop Data
The next set of cells will scrape crop price data from X to Y and store it under `data/crop_price_data.csv`.

In [8]:
## Constants

API_KEY = os.environ["CROP_DATA_API_KEY"]
BASE_CROP_PRICE_API = "https://commodities-api.com/api"
SYMBOL = "CORN"

In [9]:
# TODO

# data = {}

# for year_delta in range(22): 
#     start_date = f"{2000 + year_delta}-03-24"
#     end_date = f"{2000 + year_delta}-12-31"
#     resp = requests.get(f"{BASE_CROP_PRICE_API}/timeseries?access_key={API_KEY}&symbols={SYMBOL}&start_date={start_date}&end_date={end_date}")
#     data[2000+year_delta] = resp.json()

In [10]:
# data