In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests, re
import pandas as pd
from geopy.geocoders import Nominatim

In [2]:
# Function to scrape GP data
def scrape_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    gp_blocks = soup.find_all('div', {'class': 'results__details'})
    gp_data = []
    for block in gp_blocks:
        name = block.find('h2', {'id': lambda x: x and x.startswith('orgname_')}).text.strip()
        address = block.find('p', {'id': lambda x: x and x.startswith('address_')}).text.strip()
        phone = block.find('p', {'id': lambda x: x and x.startswith('phone_')}).text.strip()
        tags = block.find_all('strong', {'id': lambda x: x and x.startswith('result_item_')})
        tags_text = [tag.text.strip() for tag in tags]
        gp_data.append({
            'Name': name,
            'Address': address,
            'Phone': phone,
            'Accepting New Patients': 'Accepting new patients' in tags_text,
            'Accepts Out of Area Registrations': 'Accepts out of area registrations' in tags_text,
            'Online Registration Available': 'Online registration available' in tags_text
        })
    return gp_data

# Function to scrape initial GP URLs
def scrape_gp_links(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    gp_links = []
    for link in soup.find_all('a', {'class': 'nhsapp-open-in-webview'}):
        href = link.get('href')
        if not (href.startswith('javascript') or href.startswith('#')):
            gp_links.append(href)
    return gp_links

# Scrape Reviews Function
def scrape_reviews(gp_links):
    reviews_data = []
    for gp_url in gp_links:
        review_url = f"{gp_url}/ratings-and-reviews"
        try:
            response = requests.get(review_url)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            gp_name = soup.find('h1').text.strip().split('\n')[0].strip()

            review_blocks = soup.find_all('div', {'class': 'org-review'})
            for block in review_blocks:
                review_text_block = block.find('p', class_='comment-text')
                if review_text_block:
                    review_text = review_text_block.get_text(strip=True)
                    reviews_data.append({'GPName': gp_name, 'ReviewText': review_text})
                    
        except requests.HTTPError as e:
            print(f"Could not fetch reviews for {gp_url}: {e}")
    
    return reviews_data

In [3]:
# Function for Geocoding Addresses
def geocode_addresses(gp_data):
    geolocator = Nominatim(user_agent="GP_Finder_App")
    for gp in gp_data:
        try:
            location = geolocator.geocode(gp['Address'])
            if location:
                gp['Latitude'] = location.latitude
                gp['Longitude'] = location.longitude
            else:
                gp['Latitude'] = None
                gp['Longitude'] = None
        except Exception as e:
            print(f"Error geocoding {gp['Address']}: {e}")
            gp['Latitude'] = None
            gp['Longitude'] = None
    return gp_data

In [4]:
# Function to Export Data to CSV
def export_to_csv(data, filename):
    import os
    path = f"../data/{filename}"
    os.makedirs(os.path.dirname(path), exist_ok=True)
    df = pd.DataFrame(data)
    df.to_csv(path, index=False)
    print(f"Data exported to {path} successfully.")

In [5]:
# Main Execution
if __name__ == "__main__":
    # Ask the user for a postcode
    postcode = input("Enter a postcode: ")
    
    # Update the URL with the user's postcode
    url = f"https://www.nhs.uk/service-search/find-a-gp/results/{postcode}"
    
    # Scrape data
    gp_data = scrape_data(url)
    gp_links = scrape_gp_links(url)
    reviews_data = scrape_reviews(gp_links)

    # Geocode the addresses in the GP data
    gp_data_geocoded = geocode_addresses(gp_data)

    # Export GP data to CSV in the 'data' folder
    export_to_csv(gp_data_geocoded, 'gp_info.csv')

    # Export Reviews data to CSV in the 'data' folder
    export_to_csv(reviews_data, 'gp_reviews.csv')

    print("Data scraping and export completed.")

Enter a postcode: sw129lq
Data exported to ../data/gp_info.csv successfully.
Data exported to ../data/gp_reviews.csv successfully.
Data scraping and export completed.
