# Obtain open data

In [None]:
# Load packages
from dotenv import load_dotenv
import osmnx as ox
import uuid
import pandas as pd
import os
import requests
import pycountry

In [None]:
# Load environment variables from .env file
load_dotenv()

In [None]:
# Function to generate a unique identifier for point of interest or cell site
def generate_uuids(n):
    return [str(uuid.uuid4()) for _ in range(n)]

In [None]:
# Map a country name to its ISO-3 code
def country_name_to_iso3(country_name):
    try:
        country = pycountry.countries.lookup(country_name)
        return country.alpha_3
    except LookupError:
        return None

## Country

In [None]:
# Enter the country name
country_name = 'Switzerland'

In [None]:
# Fetch the country code
country_code = country_name_to_iso3(country_name)

In [None]:
print(f"The ISO-3 country code for {country_name} is {country_code}")

## Points of interest

In [None]:
# Get all points of interest, such as schools in Geneva
# Specify the locations
place = "Carouge, Geneva, Switzerland"
# Specify the tag, to find out more about which tags to use, search https://wiki.openstreetmap.org/
tags = {"amenity": "school"}
# Get the data from OpenStreetMaps
schools = ox.features_from_place(place, tags)
# Restrict the data to the first three points of interest
schools = schools.head(3)

In [None]:
# Convert the schools to a DataFrame
schools_df = pd.DataFrame({
    "poi_id": generate_uuids(len(schools)),
    "lat": schools.geometry.centroid.y,
    "lon": schools.geometry.centroid.x,
}).reset_index(drop=True)

In [None]:
# Inspect the data
schools_df.head()

## Cell sites

In [None]:
# Get the bounding box of the points of interest (with a 100m buffer around it)
poi_bounding_box = schools.to_crs(epsg=3857).buffer(100).to_crs(epsg=4326).total_bounds

In [None]:
# Fetch your OpenCellId API key, or provide as a string
opencellid_key = os.getenv("OPENCELLIDKEY")  # opencellid_key = "your_key_here"

In [None]:
# Set up the API URL
api_url = f"http://www.opencellid.org/cell/getInArea?key={opencellid_key}&BBOX={poi_bounding_box[1]},{poi_bounding_box[0]},{poi_bounding_box[3]},{poi_bounding_box[2]}&format=json"

In [None]:
# Make the API request
response = requests.get(api_url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    # Convert the JSON response to a DataFrame
    cell_sites_df = pd.DataFrame(data['cells'])
else:
    print(f"Failed to retrieve data: {response.status_code}")

In [None]:
# Only keep 4G cell sites
cell_sites_df = cell_sites_df.loc[cell_sites_df["radio"]=="LTE",["lat","lon","radio"]].rename(columns={"radio":"radio_type"})
# Add unique identifier
cell_sites_df["ict_id"] = generate_uuids(len(cell_sites_df))

In [None]:
# Inspect the data
cell_sites_df.head()

## Save data

In [None]:
# Get the current directory
current_dir = os.getcwd()
root_dir = os.path.dirname(os.path.dirname(current_dir))
data_dir = os.path.join(root_dir, 'data')

In [None]:
# Save the data we obtained to a csv file in the data/input_data/<country-code> directory
subfolder_path = os.path.join(os.getcwd(), '..', '..', 'data', 'input_data', country_code)
os.makedirs(subfolder_path, exist_ok=True)
schools_df.to_csv(os.path.join(subfolder_path, "carouge-schools.csv"), index=False)
cell_sites_df.to_csv(os.path.join(subfolder_path, "carouge-cell-sites.csv"), index=False)