In [1]:
# !pip install --upgrade pip
# !pip install pandas
# !pip install sodapy



In [2]:
import pandas as pd
from sodapy import Socrata
import json
from datetime import datetime, timedelta

In [3]:
with open('config.json') as f:
  config = json.load(f)

AppToken = config['app_token']
UserName = config['user_name']
Password = config["password"]

In [4]:
client = Socrata("data.iowa.gov",
                 AppToken,
                 username = UserName,
                 password = Password,
                 timeout=30)

In [5]:
col_selected = 'invoice_line_no, date, store, name, city, zipcode, county, category, category_name, vendor_no, vendor_name, itemno, im_desc, state_bottle_cost, state_bottle_retail, sale_bottles, sale_dollars'
col_list = ['invoice_line_no', 'date', 'store', 'name', 'city', 'zipcode', 'county', 'category', 'category_name', 'itemno', 'im_desc', 'state_bottle_cost', 'state_bottle_retail', 'sale_bottles', 'sale_dollars']
len(col_list)

15

In [7]:
start_year = 2021
current_year = datetime.now().year

for year in range(start_year, current_year + 1):
    start_date = f"{year}-01-01T00:00:00.000"
    end_date = f"{year + 1}-01-01T00:00:00.000"

    total_rows = []  # Store all rows for the year
    limit = 5000  # Set the limit per page
    offset = 0  
    more_data = True  

    while more_data:
        results = client.get("m3tr-qhgy",
                             select=col_selected, 
                             where=f"LOWER(name) LIKE '%hy-vee%' AND date >= '{start_date}' AND date < '{end_date}'", 
                             limit=limit, 
                             offset=offset)
        total_rows.extend(results)  

        if len(results) < limit:
            more_data = False
        else:
            offset += limit 

    df = pd.DataFrame.from_records(total_rows)
    print(f"Rows for {year}: {df.shape[0]}")  

    # Save to CSV
    df.to_csv(f'data/hyvee_{year}.csv', index=False)
    print(f"Dataset for {year} is saved.")


Rows for 2019: 762568
Rows for 2020: 814746
Rows for 2021: 772734
Rows for 2022: 733503
Rows for 2023: 643450


In [8]:
df.head()

Unnamed: 0,invoice_line_no,date,store,name,city,zipcode,county,category,category_name,vendor_no,vendor_name,itemno,im_desc,state_bottle_cost,state_bottle_retail,sale_bottles,sale_dollars
0,INV-54554300002,2023-01-02T00:00:00.000,2699,HY-VEE C-STORE #2 - ANKENY,ANKENY,50023,POLK,1031100,AMERICAN VODKAS,301,FIFTH GENERATION INC,38176,TITOS HANDMADE VODKA,10.0,15.0,24,360.0
1,INV-54555100014,2023-01-02T00:00:00.000,2636,HY-VEE WINE AND SPIRITS / HUBBELL,DES MOINES,50317,POLK,1012100,CANADIAN WHISKIES,260,DIAGEO AMERICAS,11297,CROWN ROYAL,19.99,29.99,12,359.88
2,INV-54554200012,2023-01-02T00:00:00.000,2666,HY-VEE FOOD STORE #2 / STATE ANKENY,ANKENY,50023,POLK,1011200,STRAIGHT BOURBON WHISKIES,260,DIAGEO AMERICAS,17086,BULLEIT BOURBON,17.49,26.24,12,314.88
3,INV-54555100017,2023-01-02T00:00:00.000,2636,HY-VEE WINE AND SPIRITS / HUBBELL,DES MOINES,50317,POLK,1031100,AMERICAN VODKAS,421,SAZERAC COMPANY INC,35317,BARTON VODKA,3.97,5.96,12,71.52
4,INV-54554900003,2023-01-02T00:00:00.000,2633,HY-VEE #3 / BDI / DES MOINES,DES MOINES,50320,POLK,1011200,STRAIGHT BOURBON WHISKIES,619,CAMPARI AMERICA,22157,WILD TURKEY 101,17.5,26.25,17,446.25
