In [None]:
import pandas as pd
import random
from datetime import datetime

# Define a base set of sample data
base_terms = [
    {
        "Term": "Hypertension",
        "Definition": "A condition in which the force of the blood against the artery walls is too high.",
        "Causes": "Genetics, poor diet, lack of exercise, stress",
        "Symptoms": "Headaches, dizziness, shortness of breath",
        "Diagnosis": "Blood pressure tests",
        "Treatments": "Lifestyle changes, medications",
        "Medications": "ACE inhibitors, beta blockers, calcium channel blockers",
        "Prevention": "Regular exercise, healthy diet, stress management",
        "Prognosis": "Good with treatment, risk of stroke or heart attack if untreated",
        "Complications": "Heart disease, stroke, kidney damage",
        "References": "American Heart Association"
    },
    {
        "Term": "Diabetes",
        "Definition": "A group of diseases that result in too much sugar in the blood.",
        "Causes": "Genetics, obesity, lack of exercise",
        "Symptoms": "Frequent urination, excessive thirst, extreme hunger",
        "Diagnosis": "Blood sugar tests, HbA1c test",
        "Treatments": "Insulin therapy, medications, lifestyle changes",
        "Medications": "Metformin, insulin, sulfonylureas",
        "Prevention": "Healthy diet, regular exercise, maintaining a healthy weight",
        "Prognosis": "Manageable with treatment, risk of complications if untreated",
        "Complications": "Heart disease, nerve damage, kidney failure",
        "References": "American Diabetes Association"
    },
    {
        "Term": "Asthma",
        "Definition": "A condition in which your airways narrow and swell and produce extra mucus.",
        "Causes": "Allergens, pollution, respiratory infections",
        "Symptoms": "Wheezing, shortness of breath, chest tightness",
        "Diagnosis": "Spirometry, peak flow test",
        "Treatments": "Inhalers, long-term control medications, quick-relief inhalers",
        "Medications": "Albuterol, corticosteroids",
        "Prevention": "Avoiding triggers, taking preventive medication",
        "Prognosis": "Variable, can be managed with treatment",
        "Complications": "Severe asthma attacks, respiratory failure",
        "References": "Mayo Clinic"
    }
]

# Function to generate more entries based on the base set
def generate_entries(base_terms, num_entries):
    data = []
    for i in range(1, num_entries + 1):
        term = random.choice(base_terms)
        new_entry = [
            i,
            f"{term['Term']} {i}",  # Append a unique identifier to each term
            term["Definition"],
            term["Causes"],
            term["Symptoms"],
            term["Diagnosis"],
            term["Treatments"],
            term["Medications"],
            term["Prevention"],
            term["Prognosis"],
            term["Complications"],
            term["References"]
        ]
        data.append(new_entry)
    return data

# Generate 100,000 entries
entries = generate_entries(base_terms, 100000)

# Create DataFrame
df = pd.DataFrame(entries, columns=["TermID", "Term", "Definition", "Causes", "Symptoms", "Diagnosis", "Treatments", "Medications", "Prevention", "Prognosis", "Complications", "References"])

# Save to CSV
df.to_csv("medical_terms_and_cures_large.csv", index=False)
print("Dataset generated and saved as 'medical_terms_and_cures_large.csv'")


Dataset generated and saved as 'medical_terms_and_cures_large.csv'
