In [58]:
import requests
import pandas as pd

# Constants
BASE_URL = "https://datasets-server.huggingface.co/rows"
DATASET = "hari560/health"
CONFIG = "default"
SPLIT = "train"
BATCH_SIZE = 100  # Maximum per request
TOTAL_RECORDS = 1003

# Fetch all records in batches
all_records = []

for offset in range(0, TOTAL_RECORDS, BATCH_SIZE):
    url = f"{BASE_URL}?dataset={DATASET}&config={CONFIG}&split={SPLIT}&offset={offset}&length={BATCH_SIZE}"
    print(f"Fetching records {offset} to {offset + BATCH_SIZE}...")
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()
    
    rows = data['rows']
    records = [row['row'] for row in rows]
    all_records.extend(records)

# Convert to DataFrame
df = pd.DataFrame(all_records)

# Save to CSV
df.to_csv("health_dataset_full.csv", index=False)

print(f"✅ Fetched {len(df)} records and saved to health_dataset_full.csv")

Fetching records 0 to 100...
Fetching records 100 to 200...
Fetching records 200 to 300...
Fetching records 300 to 400...
Fetching records 400 to 500...
Fetching records 500 to 600...
Fetching records 600 to 700...
Fetching records 700 to 800...
Fetching records 800 to 900...
Fetching records 900 to 1000...
Fetching records 1000 to 1100...
✅ Fetched 1003 records and saved to health_dataset_full.csv


In [65]:
data = pd.read_csv("health_dataset_full.csv")
column = data["text"][0]
column

"###Input :My daughter ( F, 18 y/o, 5'5', 165lbs) has been feeling poorly for a 6-8 months. She had COVID a couple of months ago and symptoms have are much worse in the last month or so. Symptoms seem POTS-like. She feels light headed, breathless, dizzy, HR goes from ~65 lying down to ~155-160 on standing. Today she tells me HR has been around 170 all day and she feels really lousy. (She using an OTC pulse ox to measure.) She has a cardiology appt but not until March and a PCP appt but not until April since she's at school and it's a new provider. What to do? Is this a on call nurse sort of issue? Or a trip to the ED? Or wait till tomorrow and try for an early appt? Try a couple of Valsalvas? Wait it out until her cardio appt? Or? She's away at school if Boston, what to do? Thank you ###Output :If she actually has a HR of 170 that is accurate, ongoing and persistent, she needs to be seen in the ED immediately."

In [77]:
processed = pd.read_csv("health-data-processed.csv")
col = processed["interpretation"]
tags = processed["functional_tags"]
print(col)
print("=================>")
print(tags)

0       An 18-year-old female has been feeling poorly ...
1       A 37-year-old transgender man with pre-diabete...
2       A 35-year-old physically active male, previous...
3       The patient reports that all lab tests returne...
4       A 32-year-old female with asthma and a history...
                              ...                        
998     A 33-year-old overweight female from Canada pr...
999     A female patient reports macrohematuria, with ...
1000    A 28-year-old male presents with 2 days of kid...
1001    A 33-year-old male received a concerning lette...
1002    The patient describes a mobile mass in the bla...
Name: interpretation, Length: 1003, dtype: object
0                      cardiology, diagnostic_uncertainty
1       cardiology, pharmacology, patient_education, d...
2       diagnostic_uncertainty, cardiology, neurology,...
3                                  diagnostic_uncertainty
4                           cardiology, patient_education
                      