In [None]:
"""
 https://www.bambooweekly.com/egg-prices/
 https://www.bambooweekly.com/egg-prices-solution/
 https://github.com/JoergEm/Bamboo-Weekly/tree/main 
"""

In [None]:
from IPython.display import FileLink, Markdown
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from pathlib import Path
import seaborn as sns
display(Markdown("Imports ✅"))

In [None]:
try:
    import lxml
    import openpyxl
except ImportError:
    !pip install lxml
    !pip install openpyxl
    import lxml
    import openpyxl
finally:
    display(Markdown("Installs ✅"))

In [None]:
def create_folders(folders: list[str]) -> bool:
    try:
        for folder in folders:
            folderpath: str = os.path.join(os.getcwd(), folder)
            if not os.path.exists(folderpath):
                os.makedirs(folderpath, exist_ok=True)
    except:
        print("Folder {folderpath} could not be created.")
        return False
    else:
        display(Markdown("Folders ✅"))
        return True

In [None]:
def get_excel(year: int) -> pd.DataFrame:
    url: str = f"https://marketnews.usda.gov/mnp/py-report?&repDate=01%2F01%2F{year}&regionsDesc=&endDate=12%2F31%2F{year}&repMonth=1&run=Run&_producttypefrom=1&endYear=2023&producttypefrom=&repYear=2020&categoryDesc=Egg&frequency=Daily&datatype=None+Selected&previouscls=Breaking+Stock&_producttype=1&report=NW_PY041&category=Egg&subcategory=Breaking+Stock&endMonth=1&commodityDesc=Breaking+Stock&runReport=true&format=excel"
    return pd.read_html(url)[0]

In [None]:
def get_data(years: list) -> pd.DataFrame:
    data: list[pd.DataFrame] = [get_excel(year) for year in years]
    return pd.concat(data)

In [None]:
years: list[int] = [2018, 2019, 2020, 2021, 2022, 2023]
filename: str = 'USDA2018_2023DailyEggPrices.xlsx'
folders: list[str] = ['data', 'results']
filepath: str = os.path.join(folders[0], filename)
create_folders(folders)

if not os.path.exists(filepath):
    try:
        data = get_data(years)
        file_path = data.to_excel(filepath)
        display(Markdown("Data ✅"))
    except:
        display(Markdown('Error ❌'))
else:
    data = pd.read_excel(filepath)
    display(Markdown("Data loaded from existing file ✅"))  

if os.path.exists(filepath):
    display(FileLink(filepath))

In [None]:
# Given URLs for each of the last five years of egg prices, create a single data frame.
data.head()

In [None]:
# What was the average low price for eggs in each of the years o…
data["Date"] = pd.to_datetime(data["Date"])
data.set_index("Date", inplace=True)
data.index = pd.to_datetime(data.index)
data_numeric = data.select_dtypes(include=['int64', 'float64'])
data_numeric.groupby(data_numeric.index.year).mean()