# SQL Fun

In the following block we will seed a database with some data. We will use this data to practice our SQL queries. We will need to install Faker as well for this assignment (`pip install Faker`). To start create a new Postgres database on Railway, update the variables in the next block, and run block following it.

In [1]:
PGDATABASE = "railway"
PGHOST = "containers-us-west-26.railway.app"
PGPASSWORD = "8HSAlP7s9zPHPETp9L0R"
PGPORT = "6207"
PGUSER = "postgres"

In [2]:
import psycopg2
from faker import Faker
import random
from datetime import datetime, timedelta

# Create a Faker instance
fake = Faker()

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    host=PGHOST,
    database=PGDATABASE,
    port=PGPORT,
    user=PGUSER,
    password=PGPASSWORD
)
cur = conn.cursor()

# Drop tables if they already exist
cur.execute("""
    DROP TABLE IF EXISTS Diseases CASCADE;
    DROP TABLE IF EXISTS Patients CASCADE;
    DROP TABLE IF EXISTS Health_Checks CASCADE;
    DROP TABLE IF EXISTS Patient_Diseases CASCADE;
""")

# Next, we'll create the tables we need
cur.execute("""
CREATE TABLE Diseases (
    disease_id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    description TEXT
);

CREATE TABLE Patients (
    patient_id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    age INTEGER NOT NULL,
    gender TEXT NOT NULL,
    address TEXT NOT NULL
);

CREATE TABLE Health_Checks (
    check_id SERIAL PRIMARY KEY,
    patient_id INTEGER REFERENCES Patients(patient_id),
    check_date DATE NOT NULL,
    weight DECIMAL NOT NULL,
    height DECIMAL NOT NULL,
    blood_pressure TEXT NOT NULL,
    heart_rate INTEGER NOT NULL
);

CREATE TABLE Patient_Diseases (
    pd_id SERIAL PRIMARY KEY,
    patient_id INTEGER REFERENCES Patients(patient_id),
    disease_id INTEGER REFERENCES Diseases(disease_id),
    diagnosis_date DATE NOT NULL,
    recovery_date DATE
);
""")


# Diseases we're considering
diseases = [("Hypertension", "Long-term medical condition in which the blood pressure in the arteries is persistently elevated."),
            ("Diabetes", "Chronic disease that occurs either when the pancreas does not produce enough insulin or when the body cannot effectively use the insulin it produces."),
            ("Asthma", "Common long-term inflammatory disease of the airways of the lungs."),
            ("Arthritis", "Inflammation of one or more joints, causing pain and stiffness that can worsen with age."),
            ("Cancer", "Diseases involving abnormal cell growth with the potential to invade or spread to other parts of the body."),
            ("Common cold", "Mild viral infectious disease of the upper respiratory system (nose and throat)."),
            ("Dementia", "Broad category of brain diseases that cause a long-term and often gradual decrease in the ability to think and remember that is severe enough to affect daily functioning.")]

# Insert diseases into Diseases table
for disease in diseases:
    cur.execute("INSERT INTO Diseases (name, description) VALUES (%s, %s)", (disease[0], disease[1]))

conn.commit()

# Get the IDs of the diseases we just inserted
cur.execute("SELECT disease_id FROM Diseases")
disease_ids = [row[0] for row in cur.fetchall()]

# Create data for 100 patients
for _ in range(100):
    name = fake.name()
    age = random.randint(20, 80)
    gender = random.choice(["Male", "Female"])
    address = fake.address().replace("\n", ", ")

    cur.execute("INSERT INTO Patients (name, age, gender, address) VALUES (%s, %s, %s, %s) RETURNING patient_id", (name, age, gender, address))
    patient_id = cur.fetchone()[0]

    # Create health checks for each patient
    for _ in range(random.randint(5, 20)):  # 1-5 health checks per patient
        check_date = fake.date_between(start_date='-1y', end_date='today')
        weight = round(random.uniform(50, 100), 1)
        height = round(random.uniform(1.5, 2), 2)
        blood_pressure = f"{random.randint(80, 130)}/{random.randint(60, 90)}"
        heart_rate = random.randint(60, 100)

        cur.execute("INSERT INTO Health_Checks (patient_id, check_date, weight, height, blood_pressure, heart_rate) VALUES (%s, %s, %s, %s, %s, %s)",
                    (patient_id, check_date, weight, height, blood_pressure, heart_rate))

    # Assign diseases to some patients
    if random.choice([True, False]):  # 50% chance of having a disease
        disease_id = random.choice(disease_ids)
        diagnosis_date = fake.date_between(start_date='-1y', end_date='today')
        recovery_date = fake.date_between(start_date=diagnosis_date, end_date='today') if random.choice([True, False]) else None  # 50% chance of recovery

        cur.execute("INSERT INTO Patient_Diseases (patient_id, disease_id, diagnosis_date, recovery_date) VALUES (%s, %s, %s, %s)",
                    (patient_id, disease_id, diagnosis_date, recovery_date))

# Save the changes and close the connection
conn.commit()

1. Get a list of all patients' names and ages.

In [None]:
# Your code here ❌
# *         # fieldname,fieldname

2. Get a list of all diseases, along with their descriptions.

In [None]:
# Your code here ❌

3. Find the number of health checks a specific patient (by their ID) has had.

[COUNT](https://www.postgresqltutorial.com/postgresql-aggregate-functions/postgresql-count-function/)

In [None]:
# Your code here ❌

4. Get a list of patients (by name) diagnosed with "Diabetes".

In [None]:
# Your code here ❌

5. Get the list of diseases a specific patient (by their ID) was diagnosed with.

In [None]:
# Your code here ❌

6. Find the average age of patients.

In [None]:
# Your code here ❌

7. Get a list of all patients' names, and the count of health checks they have had.

In [None]:
# Your code here ❌