# API Project

https://www.opendata.nhs.scot/dataset/weekly-accident-and-emergency-activity-and-waiting-times/resource/a5f7ca94-c810-41b5-a7c9-25c18d43e5a4

### Import Libraries

In [24]:
import requests # For sending the API request
import json
from sqlalchemy import create_engine  # For connecting to a PostgreSQL database (or other databases)
import os
import openpyxl  # For reading and writing Excel (.xlsx) files
import pandas as pd

### Make the API request

In [4]:
# API URL
url = "https://www.opendata.nhs.scot/api/3/action/datastore_search"

# Parameters
params = {
    "resource_id": "a5f7ca94-c810-41b5-a7c9-25c18d43e5a4",
    "limit": 1300,
    "filters": json.dumps({"TreatmentLocation": "S308H"})
}

# Send the request
response = requests.get(url, params=params)

### Interpret the result

In [5]:
if response.status_code == 200:
    print(f"Request was a success! Status code: {response.status_code}")
else:
    print(f"Error: {response.status_code}")

Request was a success! Status code: 200


### Print the records

In [6]:
if response.status_code == 200:
    data = response.json()
    records = data.get("result", {}).get("records", [])  # Extract the records

### Add records to a data frame

In [7]:
import pandas as pd

# Convert records to a DataFrame
df = pd.DataFrame(records)

# Show the first few rows
df.tail()

Unnamed: 0,_id,WeekEndingDate,Country,HBT,TreatmentLocation,DepartmentType,AttendanceCategory,NumberOfAttendancesEpisode,NumberWithin4HoursEpisode,NumberOver4HoursEpisode,PercentageWithin4HoursEpisode,NumberOver8HoursEpisode,PercentageOver8HoursEpisode,NumberOver12HoursEpisode,PercentageOver12HoursEpisode
1099,36029,20250323,S92000003,S08000024,S308H,Type 1,Unplanned,1070,683,387,63.8,106,9.9,45,4.2
1100,36030,20250323,S92000003,S08000024,S308H,Type 1,All,1248,861,387,69.0,106,8.5,45,3.6
1101,36117,20250330,S92000003,S08000024,S308H,Type 1,New planned,182,182,0,100.0,0,0.0,0,0.0
1102,36118,20250330,S92000003,S08000024,S308H,Type 1,Unplanned,1049,647,402,61.7,121,11.5,59,5.6
1103,36119,20250330,S92000003,S08000024,S308H,Type 1,All,1231,829,402,67.3,121,9.8,59,4.8


# Transformation

In [8]:
df.columns = df.columns.str.lower()
df = df.drop(columns = ["country", "hbt"])
df.head()

Unnamed: 0,_id,weekendingdate,treatmentlocation,departmenttype,attendancecategory,numberofattendancesepisode,numberwithin4hoursepisode,numberover4hoursepisode,percentagewithin4hoursepisode,numberover8hoursepisode,percentageover8hoursepisode,numberover12hoursepisode,percentageover12hoursepisode
0,53,20150222,S308H,Type 1,Unplanned,972,903,69,92.9,2,0.2,0,0.0
1,54,20150222,S308H,Type 1,All,972,903,69,92.9,2,0.2,0,0.0
2,117,20150301,S308H,Type 1,Unplanned,1011,917,94,90.7,4,0.4,0,0.0
3,118,20150301,S308H,Type 1,All,1011,917,94,90.7,4,0.4,0,0.0
4,181,20150308,S308H,Type 1,Unplanned,1040,981,59,94.3,5,0.5,0,0.0


# Add data to the database

### Create connection to the database

In [9]:
# Create a connection to the database
db_user = "postgres"
db_password = "Chilli55"
db_host = "localhost"
db_port = "5432"
db_name = "A&E Attendances"

engine = create_engine(f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}")

with engine.connect() as conn:
    print("Connected to PostgreSQL successfully!")

Connected to PostgreSQL successfully!


### Load data into the database

In [10]:
existing_ids = pd.read_sql("SELECT _id FROM a_and_e_attendances", con=engine)['_id']
new_data = df[~df['_id'].isin(existing_ids)]

if not new_data.empty:
    new_data.to_sql("a_and_e_attendances", con=engine, if_exists="append", index=False)
    print(f"{len(new_data)} row(s) have been added to the database.")
else:
    print("Data already present within table.")

print("All data processed successfully!")

3 row(s) have been added to the database.
All data processed successfully!


Next Steps

- Check if anything can be done with the limit or need to hardcode?
- Create a dashboard - potentially pull all information in and add lookup tables to SQL for health board and hospital or add in data on demographics and or waits.
- Repeat code to ensure I understand it



Done
- Automate API calls (e.g., run it on a schedule and store data in a database). jupyter nbconvert --to script your_notebook.ipynb

In [1]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [25, 32, 30, 29, 22],
    'Department': ['HR', 'IT', 'Finance', 'IT', 'HR'],
    'Salary': [45000, 60000, 52000, 58000, 47000],
    'StartDate': pd.to_datetime(['2020-01-10', '2019-03-15', '2021-06-01', '2020-09-23', '2022-02-17'])
}

df = pd.DataFrame(data)
print(df)

## Try the PDA assignmet again. Then try data cleaning/transformation with a dummy data set.

      Name  Age Department  Salary  StartDate
0    Alice   25         HR   45000 2020-01-10
1      Bob   32         IT   60000 2019-03-15
2  Charlie   30    Finance   52000 2021-06-01
3    David   29         IT   58000 2020-09-23
4      Eva   22         HR   47000 2022-02-17
