# Fietsdata interpolatie
Verder invullen van de fietsintensiteiten op het fietsnetwerk op basis van nabijgelegen datapunten.
Wordt vooralsnog voor slechts 1 dag uitgerekend, als pilot
## Voorspelling van fietsintensiteiten op basis van nabijgeleden fietstelpunten
Interpolatie op basis van een gewogen gemiddelde van alle fietstelpunten binnen N wegsegmenten (pgr_drivingDistance). Hiervoor wordt een Dijkstra korste-pad algoritme gebruikt. Voor het Dijkstra algortime zijn nodes/edges tabellen nodig, die gemaakt worden uit de NWB (of een subset ervan, voor een focusgebied).
https://docs.pgrouting.org/latest/en/pgr_drivingDistance.html

In [ ]:
import psycopg2
try:
    postgres_pw = TokenLibrary.getSecret("redacted", "redacted", "redacted")
    conn = psycopg2.connect(dbname="rdt_dev",user="redacted",host="redacted",password=postgres_pw)
    conn.autocommit = True
    cur=conn.cursor()
    print("Database connected")
except:
    print("I am unable to connect to the database")

## Preparing nodes/vertices needed for Dijkstra algorithm

In [ ]:
# Wegenbestand omzetten naar directed graph
sql='''drop table if exists prd_fietsdata.doorfietsroute_directed;
CREATE TABLE prd_fietsdata.doorfietsroute_directed
AS (
    select w.wvk_id*-1 as id,
        w.jte_id_beg  as source,
        w.jte_id_end as target,
        frc as frc,
        1.0::float as cost,
        geom as geom
    from prd_fietsdata.doorfietsroute_cleaned AS w
    where rijrichtng in ('B','H','O') 
    union all 
    select w.wvk_id as id,
        w.jte_id_end   as source, -- dit is bewust omgedraaid 
        w.jte_id_beg   as target, -- dit is bewust omgedraaid
        frc as frc, 
        1 as cost,
        st_reverse(geom) as geom -- dit is bewust omgedraaid
    from prd_fietsdata.doorfietsroute_cleaned AS w
    where rijrichtng in ('B','O')
    );'''
cur.execute(sql)

In [ ]:
# Nodes van het netwerk berekenen, op basis van directed graph
# nodig voor visuele controles en voor kortste pad algoritme
sql='''drop table if exists prd_fietsdata.doorfietsroute_nodes;
CREATE TABLE prd_fietsdata.doorfietsroute_nodes
AS (
    select source as node,
        st_startpoint(st_linemerge(geom)) as geom
    from prd_fietsdata.doorfietsroute_directed
    union all 
    select  target as node,
        st_endpoint(st_linemerge(geom)) as geom
    from prd_fietsdata.doorfietsroute_directed
    )'''
cur.execute(sql)

## Run Dijkstra
- Make a function
- Call that function for every fietstelling
  - Instead of a FOR loop, use lateral
  - Select *, from input_table, LATERAL the_function(here_you_can_access_parameters_from_the_input_table)
  - This constitutes a FOR loop (one iteration per row in the input table) where in each loop iteration you can access the parameters from that row

In [ ]:
# Create function
# Get all road_id's within distance N from the input road_id
# N is in number of segments, but defined as FLOAT to support a possible distance metric in meters
# Save the source road_id, found road_id's and their distances in a table
sql='''DROP FUNCTION IF EXISTS rdt_fietstellingen_drivingdistance;
CREATE FUNCTION rdt_fietstellingen_drivingdistance(road_id INT, N FLOAT) RETURNS void AS $$
    BEGIN
        INSERT INTO int_fietsdata.fietstellingen_drivingdistance (road_id_source,road_id,weight)
        SELECT road_id as road_id_source, d.edge as road_id, d.agg_cost as weight
        FROM pgr_drivingDistance(
            'select id, source, target, cost from prd_fietsdata.doorfietsroute_directed',
            ARRAY(
                SELECT source as s
                from prd_fietsdata.doorfietsroute_directed
                WHERE id=road_id
                UNION ALL
                SELECT target as s
                from prd_fietsdata.doorfietsroute_directed
                WHERE id=road_id
            ),
            N) as d;
    END;
$$ LANGUAGE plpgsql;'''
cur.execute(sql)

In [14]:
sql='''DROP TABLE IF EXISTS int_fietsdata.fietstellingen_24h_road_id_measurements;
CREATE TABLE int_fietsdata.fietstellingen_24h_road_id_measurements AS (
    SELECT 'ndw' as source
        , n.beschrijving, n.measurementdate, n.counts_BothWaysSummed, n.road_id
        ,n.geom
    from prd_fietsdata.ndw_fietstellingen_measured_data_24h_road_id as n
    union
    SELECT 'pnh' as source
        , p.beschrijving, p.measurementdate, p.counts_BothWaysSummed, p.road_id
        ,p.geom
    from prd_fietsdata.pnh_fietstellingen_measured_data_24h_road_id as p
);'''
cur.execute(sql)

In [15]:
# Init table to collect data
# Run function for each fietstelling
sql='''drop TABLE if EXISTS int_fietsdata.fietstellingen_drivingdistance;
create TABLE int_fietsdata.fietstellingen_drivingdistance (
    id SERIAL PRIMARY KEY,
    road_id_source INT,
    road_id INT,
    weight FLOAT
);
SELECT *
FROM (
    SELECT DISTINCT abs(road_id) as road_id
    from int_fietsdata.fietstellingen_24h_road_id_measurements
 ) as i
,LATERAL rdt_fietstellingen_drivingdistance(i.road_id::int,10.0) AS result;'''
cur.execute(sql)

## Interpolation
- afstand=weight
- sum(weights)*(count(weights)-1)=totalweight
- (totalweight-weight)/(totalweight)*fietstelling voor elke fietstelling binnen afstand N
- Dit geeft een gewogen gemiddelde van alle tellingen binnen afstand N

In [16]:
# Clean results
sql='''drop table if exists int_fietsdata.fietstellingen_drivingdistance_cleaned;
CREATE TABLE int_fietsdata.fietstellingen_drivingdistance_cleaned
AS (
    select abs(road_id) as road_id, road_id_source, weight
    from int_fietsdata.fietstellingen_drivingdistance
    WHERE 
        road_id != -1
        AND abs(road_id) NOT IN (
            SELECT DISTINCT abs(road_id)
            from int_fietsdata.fietstellingen_24h_road_id_measurements
        )
);'''
cur.execute(sql)

In [17]:
# Calculate totalweight per road_id
sql='''drop table if exists int_fietsdata.fietstellingen_drivingdistance_totalweight;
CREATE TABLE int_fietsdata.fietstellingen_drivingdistance_totalweight
AS (
    SELECT dd.road_id, dd.weight, f.counts_bothwayssummed, tot_weight.totalweight, tot_weight.interpolationvalues
    FROM int_fietsdata.fietstellingen_drivingdistance_cleaned as dd
    left join (
        -- Calculate totalweight per road_id
        -- sum(weights)*(count(weights)-1)=totalweight
        SELECT road_id, 
            sum(weight) as totalweight,
            count(weight) as interpolationvalues
        FROM int_fietsdata.fietstellingen_drivingdistance_cleaned
        GROUP BY road_id
    ) as tot_weight
    ON dd.road_id=tot_weight.road_id
    LEFT JOIN (
        -- Replace the 'road_id_source' value by its fietstelling value
        SELECT road_id,counts_bothwayssummed
        FROM int_fietsdata.fietstellingen_24h_road_id_measurements
        WHERE measurementdate=to_timestamp('2022-09-09','YYYY-MM-DD')
    ) as f
    ON f.road_id=dd.road_id_source
    ORDER BY road_id
);'''
cur.execute(sql)

In [18]:
# (totalweight-weight)/(totalweight)*fietstelling voor elke fietstelling binnen afstand N
sql='''drop table if exists prd_fietsdata.fietstellingen_24h_road_id_interpolated;
CREATE TABLE prd_fietsdata.fietstellingen_24h_road_id_interpolated
AS (
    SELECT 'interpolation' as source, '' as beschrijving, to_timestamp('2022-09-09','YYYY-MM-DD') as measurementdate
        ,tw_group.counts_bothwayssummed, tw_group.road_id
        ,w.geom
    FROM (
        SELECT 
            road_id,
            round(sum(CASE
                -- 0 value works as a flag -> these values should not be interpolated but used directly
                WHEN tw.interpolationvalues=1 THEN tw.counts_bothwayssummed
                ELSE (tw.totalweight-tw.weight)/(tw.totalweight*(tw.interpolationvalues-1))*tw.counts_bothwayssummed
            END)) as counts_bothwayssummed
        FROM int_fietsdata.fietstellingen_drivingdistance_totalweight as tw
        GROUP BY road_id
        ORDER BY road_id
    ) as tw_group
    left JOIN prd_fietsdata.doorfietsroute_cleaned AS w
    on tw_group.road_id = w.wvk_id
);'''
cur.execute(sql)