This notebook is for snapping of substations to lines

In [None]:
import os
import matplotlib.pyplot as plt 
import geopandas as gpd
#import geoplot
import pandas as pd
import numpy as np
from shapely.geometry import LineString
from shapely.geometry import Point

import sys
sys.path.append('../')  # to import helpers
from scripts._helpers import _sets_path_to_root
_sets_path_to_root("pypsa-africa")

# Africa shape data

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
africa = world.query('continent == "Africa"')
nigeriaplot = world.query('name == "Nigeria"')
#world

# Load data

In [None]:
# pathg = os.path.realpath("data")+'/africa_all_generators.geojson' # Generators are not required in base_network
paths = os.path.realpath("data/clean")+'/africa_all_substations.geojson'
pathl = os.path.realpath("data/clean")+'/africa_all_lines.geojson'

# generators = gpd.read_file(pathg).set_crs(epsg=4326, inplace=True)
substations = gpd.read_file(paths).set_crs(epsg=4326, inplace=True)
lines = gpd.read_file(pathl).set_crs(epsg=4326, inplace=True)

In [None]:
lines

In [None]:
print(os.path.realpath("data/clean"))

In [None]:
# Filter only Nigeria
lines_ng = lines[lines.loc[:,"country"] == "NG"].copy()
substations_ng = substations[substations.loc[:,"country"] == "NG"].copy()

In [None]:
lines_ng

In [None]:
# Assign to every line a start and end point

lines_ng["bounds"] = lines_ng["geometry"].boundary # create start and end point
# splits into coordinates
lines_ng["bus0_lon"] = lines_ng["bounds"].bounds.iloc[:,0]
lines_ng["bus0_lat"] = lines_ng["bounds"].bounds.iloc[:,1]
lines_ng["bus1_lon"] = lines_ng["bounds"].bounds.iloc[:,2]
lines_ng["bus1_lat"] = lines_ng["bounds"].bounds.iloc[:,3]

lines_ng['bus_0_coors']=lines_ng["bounds"].apply(lambda mp: mp[0])
lines_ng['bus_1_coors']=lines_ng["bounds"].apply(lambda mp: mp[1])

In [None]:
lines_ng

In [None]:
#lines_ng[lines_ng["length"] > 10000].sort_values("length").describe()
#substations_ng

# Case 1: Create bus dataframe from line.csv (not using existing substation dataframe)

### Create bus_id for each line ending

In [None]:
bus_s = gpd.GeoDataFrame(columns = substations_ng.columns)
bus_e = gpd.GeoDataFrame(columns = substations_ng.columns)

In [None]:
# Read information from line.csv
bus_s[["voltage", "lon", "lat", "geometry", "country"]] = lines_ng[["voltage", "bus0_lon", "bus0_lat", "bus_0_coors", "country"]] # line start points
bus_e[["voltage", "lon", "lat", "geometry", "country"]] = lines_ng[["voltage", "bus1_lon", "bus1_lat", "bus_1_coors", "country"]] # line end points
bus_all = bus_s.append(bus_e).reset_index(drop=True)

In [None]:
# Assign index to bus_id 
bus_all.loc[:,"bus_id"] = bus_all.index
bus_all

### V1. Create station_id at same bus location

- We saw that buses are not connected exactly at one point, they are usually connected to a substation "area" (analysed on maps)
- Create station_id at exactly the same location might therefore be not always correct
- Though as you can see below, it might be still sometime the case. Examples are **station 4** (2 lines with the same voltage connect at the same point) and **station 23** (4 lines with two different voltages connect at the same point)

#TODO: Filter out the generator lines - defined as going from generator to the next station which is connected to a load. Excluding generator lines make proably sense because they are not transmission expansion relevant. For now we simplify and include generator lines.


In [None]:
# If same location/geometry make station
bus_all_v1 = bus_all
bus_all_v1["station_id"] = bus_all_v1.groupby(["lon","lat"]).ngroup()

## INFO, case nigeria
# bus_id 0:899
# station_id 0:791
# Means, only a few bus_id's at same location
bus_all_v1

In [None]:
# Add station_id to line dataframe
n_row = int(bus_all.shape[0]/2)  # row length
lines_ng_v1 = lines_ng.reset_index(drop=True)
lines_ng_v1['bus0'] = bus_all.loc[0:(n_row-1), ["bus_id"]]
lines_ng_v1['bus1'] = bus_all.loc[(n_row):, ["bus_id"]].reset_index(drop=True)

In [None]:
# display(lines_ng_v1)
# display(bus_all_v1)

In [None]:
# Removing the NaN 
bus_all_v1["dc"] = "False"
bus_all_v1["symbol"] = "False"
bus_all_v1["under_construction"] = "False"
bus_all_v1["tag_substation"] = "False"
bus_all_v1["tag_area"] = "False"
bus_all_v1["substation_lv"] = True


In [None]:
import shapely
shapely.__version__
from shapely.validation import make_valid

## Decide on LV_Substation

In [None]:
bus_with_stations_duplicates = bus_all_v1[bus_all_v1.station_id.duplicated(keep=False)].sort_values(by=["station_id","voltage"])
# bus_with_stations_duplicates

In [None]:
lv_bus_at_station_duplicates = bus_all_v1[bus_all_v1.station_id.duplicated(keep=False)].sort_values(by=["station_id","voltage"]).drop_duplicates(subset=["station_id"])
# lv_bus_at_station_duplicates

In [None]:
# Set all buses with station duplicates "False"
bus_all_v1.loc[bus_with_stations_duplicates.index, "substation_lv"] = False
# Set lv_buses with station duplicates "True"
bus_all_v1.loc[lv_bus_at_station_duplicates.index, "substation_lv"] = True

In [None]:
# Check if bus in country exist
bus_all_v1[(bus_all_v1["country"] == "TN")]

In [None]:
bus_all_v1[bus_all_v1["substation_lv"] == True]

In [None]:
# Check
bus_all_v1[bus_all_v1["station_id"] == 20]

- If. station id the same
- Then. find bus with min value
- If. min has two/NA
- Then. pick first of the duplicates
- Action set lv_substation = True 

In [None]:

## Generate Files for base_network (CSV) 

### lines
outputfile_partial = os.path.join(os.getcwd(), "data", "base_network", "africa_all" + "_lines" + "_build_network") # Output file directory

if not os.path.exists(outputfile_partial):
    os.makedirs(os.path.dirname(outputfile_partial), exist_ok=True) #  create clean directoryif not already exist

lines_ng_v1.to_csv(outputfile_partial + ".csv")  # Generate CSV


#### buses
outputfile_partial = os.path.join(os.getcwd(), "data", "base_network", "africa_all" + "_buses" + "_build_network") # Output file directory

if not os.path.exists(outputfile_partial):
    os.makedirs(os.path.dirname(outputfile_partial), exist_ok=True) #  create clean directoryif not already exist
    
bus_all_v1.to_csv(outputfile_partial + ".csv")  # Generate CSV

# Inference (Quick and Dirty) - No Guarantee

In [None]:
cdf = bus_all_v1.to_crs("EPSG:3857")

In [None]:
from itertools import combinations
series = cdf.geometry
max_distance = 100

points_within = []
for i1,i2 in combinations(range(0,series.shape[0]-1),2):  #Iterate over all pairs/combinations of indices
    if 0<series[i1].distance(series[i2])<=max_distance:
        points_within.append([i1,i2])

In [None]:
# points_within
for points in points_within:
    fi = points[0]
    si = points[1]
    bus_all_v1.at[si, 'station_id'] = bus_all_v1.iloc[fi]['station_id']


    

In [None]:
bus_all_v1

In [None]:
bus_all_v1.groupby(['station_id']).count()

# Inference (Probably a better method) - For the Archives

This method will be implemented once cleaning is completed. See method in Powermap for how it is supposed to be done in the correct way

In [None]:
x = cdf.buffer(100)
neighbours = cdf.intersection(x)
display(neighbours)
# print all the nearby points
# display(x)