# Connectivity data

**Objetive:** <br> 
The following notebook gathers connectivity information from Ookla's speedtest API based on [Ookla's Open Data Initiative](https://github.com/teamookla/ookla-open-data) GitHub repository and the [tutorials](https://github.com/teamookla/ookla-open-data/blob/master/tutorials/aggregate_by_county_py.ipynb) provided. <br>

Author(s): 
* María Reyes Retana, Consultant SCL, mariarey@iadb.org <br>
* Laura Goyeneche, Consultant SPH, lauragoy@iadb.org <br> 

Created: April 1, 2023

## 1. Basics

In [15]:
%%capture
# Libraries
import os
import dotenv
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely import wkt
import matplotlib.pyplot as plt

# Modules
from utils import *

In [16]:
%%capture 
# Working environments
dotenv.load_dotenv('/home/ec2-user/SageMaker/.env')
sclbucket   = os.getenv("sclbucket")
scldatalake = os.getenv("scldatalake")

## 2. Inputs

In [3]:
# Administrative shapefiles
adm0 = get_country_shp(level = 0)
adm1 = get_country_shp(level = 1)
adm2 = get_country_shp(level = 2)

## 3. Connectivity

### 3.1. Extract tiles 

In [5]:
# Inputs 
year    = 2023
quarter = 2

In [6]:
# URL
fixed  = get_tile_url("fixed" , year, quarter)
mobile = get_tile_url("mobile", year, quarter)

In [7]:
# Extract data
fixed  = gpd.read_file(fixed)
mobile = gpd.read_file(mobile)

In [8]:
# Export world-level data 
fixed .to_file(f"../data/3-connectivity/tiles-level/world/fixed_connectivity_{year}_{quarter}.shp" )
mobile.to_file(f"../data/3-connectivity/tiles-level/world/mobile_connectivity_{year}_{quarter}.shp")

### 3.2. LAC-26 tiles

In [None]:
# Select points in LAC-26 
fixed_  = gpd.sjoin(fixed , adm0)
mobile_ = gpd.sjoin(mobile, adm0)

In [None]:
%%capture
# Create lat-lon 
fixed_ ['lon'] = fixed_ .centroid.x
fixed_ ['lat'] = fixed_ .centroid.y
mobile_['lon'] = mobile_.centroid.x
mobile_['lat'] = mobile_.centroid.y

In [None]:
# Change units 
fixed_ ['avg_d_mbps'] = fixed_ ['avg_d_kbps'] / 1000
fixed_ ['avg_u_mbps'] = fixed_ ['avg_u_kbps'] / 1000
mobile_['avg_d_mbps'] = mobile_['avg_d_kbps'] / 1000
mobile_['avg_u_mbps'] = mobile_['avg_u_kbps'] / 1000

In [None]:
# Drop index_right
fixed_  = fixed_ .drop(columns = "index_right")
mobile_ = mobile_.drop(columns = "index_right")

In [None]:
%%capture 
# Export data
fixed_ .to_file(f"../data/3-connectivity/tiles-level/lac-26/fixed_connectivity_{year}_q{quarter}.shp" )
mobile_.to_file(f"../data/3-connectivity/tiles-level/lac-26/mobile_connectivity_{year}_q{quarter}.shp")

### 3.3. Average admin-level 1

In [18]:
# Select points in admin level 1
fix_adm1 = gpd.sjoin(fixed_ , adm1)
mob_adm1 = gpd.sjoin(mobile_, adm1)

In [22]:
# Remove columns
fix_adm1 = fix_adm1.drop(columns = ["index_right","ADM0_EN_right","ADM0_PCODE_right"])
mob_adm1 = mob_adm1.drop(columns = ["index_right","ADM0_EN_right","ADM0_PCODE_right"])

In [24]:
# Calculate fixed averages
fix_adm1 = fix_adm1[["ADM1_PCODE","avg_d_mbps","avg_u_mbps","tests"]]
fix_adm1 = calculate_stats(fix_adm1, ["ADM1_PCODE"])
fix_adm1 = fix_adm1.drop(columns = "tests")
fix_adm1 = adm1.drop(columns = "geometry").merge(fix_adm1, on = "ADM1_PCODE", how = "left")

In [26]:
# Calculate mobile averages
mob_adm1 = mob_adm1[["ADM1_PCODE","avg_d_mbps","avg_u_mbps","tests"]]
mob_adm1 = calculate_stats(mob_adm1, ["ADM1_PCODE"])
mob_adm1 = mob_adm1.drop(columns = "tests")
mob_adm1 = adm1.drop(columns = "geometry").merge(mob_adm1, on = "ADM1_PCODE", how = "left")

In [27]:
# Export data
fix_adm1.to_csv(f"../data/3-connectivity/admin-level/fixed_connectivity_adm1_{year}_q{quarter}.csv")
mob_adm1.to_csv(f"../data/3-connectivity/admin-level/mobile_connectivity_adm1_{year}_q{quarter}.csv")

### 3.4. Average admin-level 2

In [28]:
# Select points in admin level 1
fix_adm2 = gpd.sjoin(fixed_ , adm2)
mob_adm2 = gpd.sjoin(mobile_, adm2)

In [30]:
# Remove columns
fix_adm2 = fix_adm2.drop(columns = ["index_right","ADM0_EN_right","ADM0_PCODE_right"])
mob_adm2 = mob_adm2.drop(columns = ["index_right","ADM0_EN_right","ADM0_PCODE_right"])

In [34]:
# Calculate fixed averages
fix_adm2 = fix_adm2[["ADM2_PCODE","avg_d_mbps","avg_u_mbps","tests"]]
fix_adm2 = calculate_stats(fix_adm2, ["ADM2_PCODE"])
fix_adm2 = fix_adm2.drop(columns = "tests")
fix_adm2 = adm2.drop(columns = "geometry").merge(fix_adm2, on = "ADM2_PCODE", how = "left")

In [36]:
# Calculate mobile averages
mob_adm2 = mob_adm2[["ADM2_PCODE","avg_d_mbps","avg_u_mbps","tests"]]
mob_adm2 = calculate_stats(mob_adm2, ["ADM2_PCODE"])
mob_adm2 = mob_adm2.drop(columns = "tests")
mob_adm2 = adm2.drop(columns = "geometry").merge(mob_adm2, on = "ADM2_PCODE", how = "left")

In [37]:
# Export data
fix_adm2.to_csv(f"../data/3-connectivity/admin-level/fixed_connectivity_adm2_{year}_q{quarter}.csv")
mob_adm2.to_csv(f"../data/3-connectivity/admin-level/mobile_connectivity_adm2_{year}_q{quarter}.csv")