In [None]:
import pandas as pd

In [None]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

In [None]:
nndss_app_token = os.getenv('NNDSS_APP_TOKEN')

In [None]:
from datetime import datetime, timedelta

def get_year_week_from_date(date):
    """Returns the year and week number for a given date."""
    year, week, _ = date.isocalendar()
    return year, week

# Current date
current_date = datetime.now()

# First day of the current month
first_day_current_month = current_date.replace(day=1)

# Last day of the last month
last_day_last_month = first_day_current_month - timedelta(days=1)

# First day of the last month
first_day_last_month = last_day_last_month.replace(day=1)

# Get year and week number for the start and end of the last month
start_year, start_week = get_year_week_from_date(first_day_last_month)
end_year, end_week = get_year_week_from_date(last_day_last_month)

# Display the year and week range
print(f"Start of Last Month: Year = {start_year}, Week = {start_week}")
print(f"End of Last Month: Year = {end_year}, Week = {end_week}")


In [None]:
# Calculate the previous month's MMWR week and year (This is a placeholder - you'll need to determine the actual logic based on MMWR calendar)
last_month = datetime.now().replace(day=1) - timedelta(days=1)
mmwr_week = last_month.strftime('%U')  # This is not the actual MMWR week, you'll need the MMWR calendar for this
mmwr_year = last_month.strftime('%Y')
# Construct the API URL with the MMWR week and year for the last month
# You'll need to replace 'MMWR_WEEK' and 'MMWR_YEAR' with actual fields from the dataset if they're different
url = f"https://data.cdc.gov/resource/x9gk-5huc.json?$$app_token={nndss_app_token}&$where=mmwr_week='{mmwr_week}' AND mmwr_year='{mmwr_year}'"

#print(url)

In [None]:
year = 2023
week = 2
state = 'CALIFORNIA'
label = 'Chlamydia trachomatis infection'

query_url = f"https://data.cdc.gov/resource/x9gk-5huc.json?$$app_token={nndss_app_token}&$where=year='{year}' AND states='{state}' AND label='{label}'"

#print(query_url)

In [None]:
import requests


In [None]:
# Make the API request
response = requests.get(query_url)
data = response.json()

In [None]:
data[0]

In [None]:
df = pd.DataFrame(data)
df.head()

In [None]:
# df 

In [None]:
base_url = "https://data.cdc.gov/resource/x9gk-5huc.json"

# Initialize parameters for pagination
limit = 50000  # Adjust based on your preference and the API's capabilities
offset = 0
data = []

while True:
    query_url = f"{base_url}?$$app_token={nndss_app_token}&$limit={limit}&$offset={offset}"
    response = requests.get(query_url)
    
    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code}")
        break
    
    batch = response.json()
    
    if not batch:
        # If the batch is empty, we've reached the end of the dataset
        break
    
    data.extend(batch)
    offset += limit
    print(f"Fetched {len(batch)} rows, total: {len(data)}")



In [None]:
# Convert to DataFrame
df = pd.DataFrame(data)
print(f"Total rows fetched: {len(df)}")

In [None]:
df.head()

In [None]:
import requests
import json

base_url = "https://data.cdc.gov/resource/x9gk-5huc.json"


# Fetch the most recent records based on year and week, where location1 is not null
query_url = f"{base_url}?$$app_token={nndss_app_token}&$order=year DESC, week DESC&$where=location1 IS NOT NULL&$limit=1"

response = requests.get(query_url)

if response.status_code == 200:
    latest_record = response.json()
    if latest_record:
        # Extract the year and week from the most recent record
        latest_year = latest_record[0]['year']
        latest_week = latest_record[0]['week']
        
        # Now fetch all records for the most recent year and week, where location1 is not null
        week_data_query_url = f"{base_url}?$$app_token={nndss_app_token}&$where=year='{latest_year}' AND week='{latest_week}' AND location1 IS NOT NULL"
        week_data_response = requests.get(week_data_query_url)
        
        if week_data_response.status_code == 200:
            latest_week_data = week_data_response.json()
            print(f"Data for the most recent week of year {latest_year}, week {latest_week}, where location1 is not null: {json.dumps(latest_week_data[0], indent=2)}")
        else:
            print(f"Failed to fetch data for the latest week: {week_data_response.status_code}")
    else:
        print("No recent data found.")
else:
    print(f"Failed to fetch the latest data: {response.status_code}")


In [None]:
pd.DataFrame(latest_week_data)

In [None]:
df.to_pickle("../data/raw/NNDSS.pkl")