In [14]:
import pandas as pd
import pandana as pdna
import geopandas as gpd
from pathlib import Path
import networkx as nx
import momepy
from itertools import combinations
from shapely.geometry import Point


gdf_roads = gpd.read_file('../data/UKR_networks_fixed.gpkg', layer="roads")

def make_graph(gdf: gpd.GeoDataFrame, nodes: str = "nodes", edges: str = "edges") -> nx.Graph:
    G = momepy.gdf_to_nx(gdf, approach="primal")
    nodes, edges, sw = momepy.nx_to_gdf(G, points=True, lines=True, spatial_weights=True)
    #nodes.to_file(out_gpkg, layer=nodes)
    #edges.to_file(out_gpkg, layer=edges)
    G = nx.MultiGraph()
    G.add_nodes_from(nodes.nodeID.unique().tolist())
    for index, row in edges.iterrows():
        G.add_edge(row.node_start, row.node_end, weight=row.length)
    return G, nodes, edges, sw

G, nodes, edges, _ = make_graph(gdf_roads)
display(nodes.head())
display(edges.head())
net = pdna.Network(nodes.geometry.x, nodes.geometry.y, edges.node_start, edges.node_end, edges[["length"]])
net.precompute(3000)

 There are 218 disconnected components.


Unnamed: 0,nodeID,geometry
0,0,POINT (550265.396 5599118.127)
1,1,POINT (550329.890 5598885.909)
2,2,POINT (559190.887 5593222.167)
3,3,POINT (559370.266 5592902.639)
4,4,POINT (547937.874 5603826.640)


Unnamed: 0,id,length,geometry,mm_len,node_start,node_end
0,1,243.672813,"LINESTRING (550265.396 5599118.127, 550269.327...",243.672813,0,1
1,1,44.894422,"LINESTRING (550233.796 5599149.983, 550250.715...",44.894422,0,536
2,1,21.242812,"LINESTRING (550329.890 5598885.909, 550333.474...",21.242812,1,84404
3,1,369.206205,"LINESTRING (559190.887 5593222.167, 559196.552...",369.206205,2,3
4,1,72.954659,"LINESTRING (559190.887 5593222.167, 559202.941...",72.954659,2,118425


In [17]:
gdf = gpd.read_file("../data/GEODATA.gpkg").to_crs(6383)
gdf = gdf[gdf.admin_level == 1]


# Assume gdf is your GeoDataFrame
gdf['centroid'] = gdf['geometry'].centroid
gdf = gpd.GeoDataFrame(gdf[["pcode", "centroid"]], geometry="centroid")
gdf.head()

Unnamed: 0,pcode,centroid
0,UA01,POINT (876621.954 5046094.525)
1,UA05,POINT (423787.269 5422017.154)
2,UA07,POINT (151274.690 5675382.053)
3,UA12,POINT (881673.277 5381967.276)
4,UA14,POINT (1095549.609 5378690.306)


In [21]:
import numpy as np
from scipy.spatial import cKDTree
def ckdnearest(gdf_centroids, gdf_nodes):
    """Returns nearest neighbours between 1 gdf and another
    Function derived from posting @ https://gis.stackexchange.com/questions/222315/geopandas-find-nearest-point-in-other-dataframe with thanks to user JHuw
    """
    gdf_nodes = gdf_nodes.rename(columns={'geometry': 'geometry_dst'})
    try:
        nA = np.array(list(gdf_centroids.geometry.apply(lambda x: (x.x, x.y))))
        nB = np.array(list(gdf_nodes.geometry_dst.apply(lambda x: (x.x, x.y))))
        btree = cKDTree(nB)
        dist, idx = btree.query(nA, k=1)
        gdB_nearest = gdf_nodes.iloc[idx].reset_index(drop=True).nodeID
        gdf = pd.concat(
            [
                gdf_centroids.reset_index(drop=True),
                gdB_nearest,
                #pd.Series(dist, name='dist')
            ], 
            axis=1)
    except ValueError:
        gdf = gpd.GeoDataFrame({'y': [], 'x': [], 'band': [], 'spatial_ref': [], 'data': [], 'geometry': [], 'data_dst': [], 'geometry_dst': [], 'dist': []})
    return gdf[["pcode", "nodeID", "centroid"]]

In [22]:
df_nearest = ckdnearest(gdf, nodes)
df_nearest

Unnamed: 0,pcode,nodeID,centroid
0,UA01,24133,POINT (876621.954 5046094.525)
1,UA05,12582,POINT (423787.269 5422017.154)
2,UA07,144185,POINT (151274.690 5675382.053)
3,UA12,113699,POINT (881673.277 5381967.276)
4,UA14,91065,POINT (1095549.609 5378690.306)
5,UA18,133181,POINT (404670.304 5612707.727)
6,UA21,131951,POINT (24633.584 5370169.231)
7,UA23,54425,POINT (958394.687 5273745.315)
8,UA26,5990,POINT (125022.789 5399133.605)
9,UA32,126470,POINT (546697.837 5579949.035)


In [37]:
def calc_euc_distance(row):
    src = row.from_centroid
    dst = row.to_centroid
    distance = src.distance(dst)
    return distance

row_combinations = list(combinations(df_nearest[['pcode', 'nodeID', 'centroid']].values, 2))
df = pd.DataFrame(row_combinations, columns=['from', 'to'])
df[['from_pcode', 'from_nodeID', 'from_centroid']] = pd.DataFrame(df['from'].tolist(), index=df.index)
df[['to_pcode', 'to_nodeID', 'to_centroid']] = pd.DataFrame(df['to'].tolist(), index=df.index)
df = df.drop(columns=['from', 'to'])
df["euclidean_dist"] = df.apply(calc_euc_distance, axis=1)
shortest_path_nodes = net.shortest_paths(df.from_nodeID, df.to_nodeID)
df["shortest_path_nodes"] = shortest_path_nodes
df["shortest_path_length"] = net.shortest_path_lengths(df.from_nodeID, df.to_nodeID)
df['list_length'] = df['shortest_path_nodes'].apply(len)
df



Unnamed: 0,from_pcode,from_nodeID,from_centroid,to_pcode,to_nodeID,to_centroid,euclidean_dist,shortest_path_nodes,shortest_path_length,list_length
0,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA05,12582,POINT (423787.2687308593 5422017.1540261395),588538.083069,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",954515.842,919
1,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA07,144185,POINT (151274.68971481133 5675382.052816165),960276.755171,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",1331315.168,1553
2,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA12,113699,POINT (881673.2768812476 5381967.275597983),335910.733043,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",461371.135,489
3,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA14,91065,POINT (1095549.6090582262 5378690.306332936),398182.460781,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",495079.191,285
4,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA18,133181,POINT (404670.30445587693 5612707.726622723),737420.422722,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",1061099.609,1517
...,...,...,...,...,...,...,...,...,...,...
346,UA73,35235,POINT (223930.7711454259 5348968.44251406),UA80,83962,POINT (552012.8820844525 5596896.8395627495),411225.438879,"[35235, 35234, 2178, 2179, 2177, 2175, 8208, 8...",501377.218,639
347,UA73,35235,POINT (223930.7711454259 5348968.44251406),UA85,107083,POINT (828971.3479945756 4959641.241947325),719478.817433,"[35235, 35234, 2178, 2179, 2177, 2175, 8208, 8...",1311997.879,1282
348,UA74,73281,POINT (648524.6282913171 5703271.774074502),UA80,83962,POINT (552012.8820844525 5596896.8395627495),143631.973628,"[73281, 63810, 63809, 63808, 63815, 97324, 635...",191995.262,282
349,UA74,73281,POINT (648524.6282913171 5703271.774074502),UA85,107083,POINT (828971.3479945756 4959641.241947325),765210.681423,"[73281, 63810, 63809, 63808, 63815, 63574, 635...",1026877.847,1097


## This is where to fix isolated nodes or remove them from the graph

In [35]:
#  G = momepy.gdf_to_nx(gdf_roads, approach="primal")

# # Find the disconnected components
# components = list(nx.connected_components(G))

# # Print the number of disconnected components
# print(f"Number of disconnected components: {len(components)}")

# # Identify the largest component (main graph)
# largest_component = max(components, key=len)

# # Identify the nodes in the isolated parts of the graph
# isolated_nodes = [node for component in components if component != largest_component for node in component]
# print(f"Nodes in isolated parts of the graph: {isolated_nodes}")

26

In [38]:
df = df[df.list_length > 0]
df

Unnamed: 0,from_pcode,from_nodeID,from_centroid,to_pcode,to_nodeID,to_centroid,euclidean_dist,shortest_path_nodes,shortest_path_length,list_length
0,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA05,12582,POINT (423787.2687308593 5422017.1540261395),588538.083069,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",954515.842,919
1,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA07,144185,POINT (151274.68971481133 5675382.052816165),960276.755171,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",1331315.168,1553
2,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA12,113699,POINT (881673.2768812476 5381967.275597983),335910.733043,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",461371.135,489
3,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA14,91065,POINT (1095549.6090582262 5378690.306332936),398182.460781,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",495079.191,285
4,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA18,133181,POINT (404670.30445587693 5612707.726622723),737420.422722,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",1061099.609,1517
...,...,...,...,...,...,...,...,...,...,...
346,UA73,35235,POINT (223930.7711454259 5348968.44251406),UA80,83962,POINT (552012.8820844525 5596896.8395627495),411225.438879,"[35235, 35234, 2178, 2179, 2177, 2175, 8208, 8...",501377.218,639
347,UA73,35235,POINT (223930.7711454259 5348968.44251406),UA85,107083,POINT (828971.3479945756 4959641.241947325),719478.817433,"[35235, 35234, 2178, 2179, 2177, 2175, 8208, 8...",1311997.879,1282
348,UA74,73281,POINT (648524.6282913171 5703271.774074502),UA80,83962,POINT (552012.8820844525 5596896.8395627495),143631.973628,"[73281, 63810, 63809, 63808, 63815, 97324, 635...",191995.262,282
349,UA74,73281,POINT (648524.6282913171 5703271.774074502),UA85,107083,POINT (828971.3479945756 4959641.241947325),765210.681423,"[73281, 63810, 63809, 63808, 63815, 63574, 635...",1026877.847,1097


In [73]:
#edge_dict = {(row["node_start"], row["node_end"]): row["geometry"] for _, row in edges.iterrows()}
edges_dict = {}
for row in edges.itertuples():
    edges_dict[(row.node_start, row.node_end)] = row.Index
    edges_dict[(row.node_end, row.node_start)] = row.Index

In [75]:
for key, item in edges_dict.items():
    print(key)
    print(item)
    break
print(edges_dict[(24133, 24132)])
print(edges_dict[(24132, 24139)])
print(edges_dict[(24139, 24132)])
display(edges.iloc[41811])

(0, 1)
0
41810
41811
41811


id                                                            1
length                                              2192.103958
geometry      LINESTRING (871016.4604510551 5039513.55663293...
mm_len                                              2192.103958
node_start                                                24132
node_end                                                  24139
Name: 41811, dtype: object

In [81]:
from shapely.geometry import MultiLineString
def nodes_to_edges(row):
    nodes = row.shortest_path_nodes
    return [edges_dict[(nodes[i-1], nodes[i])] for i in range(1, len(nodes))]
df = df.copy()
df.loc[:, 'edge_geometries_ids'] = df.apply(nodes_to_edges, axis=1)

In [84]:
def edge_geometries(row):
    gdf = edges.loc[row.edge_geometries_ids]
    geom = gdf.geometry.unary_union
    return geom

df.loc[:, "edge_geometries"] = df.apply(edge_geometries, axis=1)
df

Unnamed: 0,from_pcode,from_nodeID,from_centroid,to_pcode,to_nodeID,to_centroid,euclidean_dist,shortest_path_nodes,shortest_path_length,list_length,edge_geometries_ids,edge_geometries
0,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA05,12582,POINT (423787.2687308593 5422017.1540261395),588538.083069,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",954515.842,919,"[41810, 41811, 41349, 41347, 41348, 41398, 413...",MULTILINESTRING ((873187.8137576623 5039213.48...
1,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA07,144185,POINT (151274.68971481133 5675382.052816165),960276.755171,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",1331315.168,1553,"[41810, 41811, 41349, 41347, 41348, 41398, 413...",MULTILINESTRING ((873187.8137576623 5039213.48...
2,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA12,113699,POINT (881673.2768812476 5381967.275597983),335910.733043,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",461371.135,489,"[41810, 41811, 41349, 41347, 41348, 41398, 413...",MULTILINESTRING ((873187.8137576623 5039213.48...
3,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA14,91065,POINT (1095549.6090582262 5378690.306332936),398182.460781,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",495079.191,285,"[41810, 41811, 41349, 41347, 41348, 41398, 413...",MULTILINESTRING ((873187.8137576623 5039213.48...
4,UA01,24133,POINT (876621.9538866108 5046094.524828508),UA18,133181,POINT (404670.30445587693 5612707.726622723),737420.422722,"[24133, 24132, 24139, 23825, 23824, 23858, 238...",1061099.609,1517,"[41810, 41811, 41349, 41347, 41348, 41398, 413...",MULTILINESTRING ((873187.8137576623 5039213.48...
...,...,...,...,...,...,...,...,...,...,...,...,...
346,UA73,35235,POINT (223930.7711454259 5348968.44251406),UA80,83962,POINT (552012.8820844525 5596896.8395627495),411225.438879,"[35235, 35234, 2178, 2179, 2177, 2175, 8208, 8...",501377.218,639,"[58335, 4332, 4330, 4329, 4325, 4326, 15498, 1...",MULTILINESTRING ((223390.62899299423 5348726.1...
347,UA73,35235,POINT (223930.7711454259 5348968.44251406),UA85,107083,POINT (828971.3479945756 4959641.241947325),719478.817433,"[35235, 35234, 2178, 2179, 2177, 2175, 8208, 8...",1311997.879,1282,"[58335, 4332, 4330, 4329, 4325, 4326, 15498, 1...",MULTILINESTRING ((223390.62899299423 5348726.1...
348,UA74,73281,POINT (648524.6282913171 5703271.774074502),UA80,83962,POINT (552012.8820844525 5596896.8395627495),143631.973628,"[73281, 63810, 63809, 63808, 63815, 97324, 635...",191995.262,282,"[97766, 97765, 97763, 97764, 97770, 97483, 974...",MULTILINESTRING ((659310.6625829777 5701723.76...
349,UA74,73281,POINT (648524.6282913171 5703271.774074502),UA85,107083,POINT (828971.3479945756 4959641.241947325),765210.681423,"[73281, 63810, 63809, 63808, 63815, 63574, 635...",1026877.847,1097,"[97766, 97765, 97763, 97764, 97465, 97464, 974...",MULTILINESTRING ((659310.6625829777 5701723.76...


In [92]:
incidents_df = pd.read_csv("../data/2022-02-01-2024-02-21-Europe-Ukraine.csv")
print(incidents_df.columns)
incidents_df = incidents_df[[
    "event_id_cnty",
    "event_date",
    "year",
    "disorder_type",
    "event_type",
    "sub_event_type",
    "latitude",
    "longitude",
    "fatalities",
    "timestamp"
]]
incidents_df['date'] = pd.to_datetime(incidents_df['timestamp'], unit='s')
incidents_df["geometry"] = incidents_df.apply(lambda row: Point(row.longitude, row.latitude), axis=1)
incidents_gdf = gpd.GeoDataFrame(incidents_df, geometry="geometry", crs=4326).to_crs(6383)
incidents_gdf

Index(['event_id_cnty', 'event_date', 'year', 'time_precision',
       'disorder_type', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2',
       'interaction', 'civilian_targeting', 'iso', 'region', 'country',
       'admin1', 'admin2', 'admin3', 'location', 'latitude', 'longitude',
       'geo_precision', 'source', 'source_scale', 'notes', 'fatalities',
       'tags', 'timestamp', 'population_best'],
      dtype='object')


Unnamed: 0,event_id_cnty,event_date,year,disorder_type,event_type,sub_event_type,latitude,longitude,fatalities,timestamp,date,geometry
0,UKR148678,16 February 2024,2024,Political violence,Battles,Armed clash,48.5008,37.9680,0,1708472500,2024-02-20 23:41:40,POINT (1110034.329 5432627.752)
1,UKR148679,16 February 2024,2024,Political violence,Battles,Armed clash,48.1394,37.7497,0,1708472500,2024-02-20 23:41:40,POINT (1099625.644 5390216.200)
2,UKR148680,16 February 2024,2024,Political violence,Battles,Armed clash,48.1936,37.6405,0,1708472500,2024-02-20 23:41:40,POINT (1090673.143 5395089.751)
3,UKR148681,16 February 2024,2024,Political violence,Battles,Armed clash,48.9259,38.2467,0,1708472500,2024-02-20 23:41:40,POINT (1123534.704 5482796.087)
4,UKR148682,16 February 2024,2024,Political violence,Explosions/Remote violence,Air/drone strike,51.1483,34.3008,0,1708472500,2024-02-20 23:41:40,POINT (810688.210 5694097.057)
...,...,...,...,...,...,...,...,...,...,...,...,...
97560,UKR51024,01 February 2022,2022,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,47.9512,37.5492,0,1680636707,2023-04-04 19:31:47,POINT (1087626.453 5367244.643)
97561,UKR51026,01 February 2022,2022,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,47.2972,37.9076,0,1680636707,2023-04-04 19:31:47,POINT (1124678.890 5298421.403)
97562,UKR51028,01 February 2022,2022,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,47.1745,37.8110,0,1680636707,2023-04-04 19:31:47,POINT (1119293.861 5283772.580)
97563,UKR51087,01 February 2022,2022,Political violence,Battles,Armed clash,48.0669,37.6664,0,1691529761,2023-08-08 21:22:41,POINT (1094566.752 5381299.564)


In [94]:
incidents_gdf.to_file("../data/ACLED_010222_210224.gpkg", driver="GPKG")

In [100]:
routes_df = gpd.GeoDataFrame(df[['from_pcode', 'to_pcode', 'euclidean_dist', "shortest_path_length", "edge_geometries"]], geometry="edge_geometries", crs=6383)
routes_df.head()

Unnamed: 0,from_pcode,to_pcode,euclidean_dist,shortest_path_length,edge_geometries
0,UA01,UA05,588538.083069,954515.842,"MULTILINESTRING ((873187.814 5039213.485, 8732..."
1,UA01,UA07,960276.755171,1331315.168,"MULTILINESTRING ((873187.814 5039213.485, 8732..."
2,UA01,UA12,335910.733043,461371.135,"MULTILINESTRING ((873187.814 5039213.485, 8732..."
3,UA01,UA14,398182.460781,495079.191,"MULTILINESTRING ((873187.814 5039213.485, 8732..."
4,UA01,UA18,737420.422722,1061099.609,"MULTILINESTRING ((873187.814 5039213.485, 8732..."


In [101]:
import dask_geopandas as dg
from shapely.geometry import Point

d_incidents_gdf = dg.from_geopandas(incidents_gdf, npartitions=4)
d_routes = dg.from_geopandas(routes_df, npartitions=4)
d_routes

Unnamed: 0_level_0,from_pcode,to_pcode,euclidean_dist,shortest_path_length,edge_geometries
npartitions=4,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,object,object,float64,float64,geometry
86,...,...,...,...,...
171,...,...,...,...,...
257,...,...,...,...,...
350,...,...,...,...,...


In [102]:
d_incidents_gdf["geometry"] =d_incidents_gdf["geometry"].buffer(1000)
gdf_joined = dg.sjoin(d_incidents_gdf, d_routes, how="inner", op="intersects")
print("COMPUTING")
result = gdf_joined.compute()
display(result)

  gdf_joined = dg.sjoin(d_incidents_gdf, d_routes, how="inner", op="intersects")


COMPUTING


Unnamed: 0,event_id_cnty,event_date,year,disorder_type,event_type,sub_event_type,latitude,longitude,fatalities,timestamp,date,geometry,index_right,from_pcode,to_pcode,euclidean_dist,shortest_path_length
4,UKR148682,16 February 2024,2024,Political violence,Explosions/Remote violence,Air/drone strike,51.1483,34.3008,0,1708472500,2024-02-20 23:41:40,"POLYGON ((811688.210 5694097.057, 811683.395 5...",16,UA01,UA59,646252.893172,842940.236
82,UKR148760,16 February 2024,2024,Political violence,Explosions/Remote violence,Air/drone strike,47.5079,35.4622,0,1708472501,2024-02-20 23:41:41,"POLYGON ((938391.033 5298652.536, 938386.218 5...",16,UA01,UA59,646252.893172,842940.236
118,UKR148586,15 February 2024,2024,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,51.1483,34.3008,0,1708472500,2024-02-20 23:41:40,"POLYGON ((811688.210 5694097.057, 811683.395 5...",16,UA01,UA59,646252.893172,842940.236
159,UKR148627,15 February 2024,2024,Political violence,Violence against civilians,Abduction/forced disappearance,46.8489,35.3653,0,1708472500,2024-02-20 23:41:40,"POLYGON ((938993.993 5224655.514, 938989.178 5...",16,UA01,UA59,646252.893172,842940.236
220,UKR148468,14 February 2024,2024,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,51.1483,34.3008,0,1708472500,2024-02-20 23:41:40,"POLYGON ((811688.210 5694097.057, 811683.395 5...",16,UA01,UA59,646252.893172,842940.236
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95808,UKR52591,03 March 2022,2022,Political violence,Battles,Armed clash,51.0480,31.8869,0,1646783636,2022-03-08 23:53:56,"POLYGON ((643728.123 5668919.124, 643723.308 5...",342,UA71,UA74,237616.976449,332229.570
96221,UKR54919,27 February 2022,2022,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,50.5952,31.6702,0,1649784779,2022-04-12 17:32:59,"POLYGON ((631730.751 5617593.917, 631725.936 5...",342,UA71,UA74,237616.976449,332229.570
96450,UKR68053,24 February 2022,2022,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,51.0480,31.8869,0,1663096258,2022-09-13 19:10:58,"POLYGON ((643728.123 5668919.124, 643723.308 5...",342,UA71,UA74,237616.976449,332229.570
94275,UKR54321,20 March 2022,2022,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,50.3273,30.5678,0,1648582626,2022-03-29 19:37:06,"POLYGON ((555144.935 5583462.688, 555140.120 5...",271,UA48,UA80,347291.532131,411478.941


In [104]:
agg_result = pd.DataFrame(result[["from_pcode", "to_pcode", "euclidean_dist", "shortest_path_length", "event_id_cnty", "event_date", "year", "disorder_type", "event_type", "fatalities"]])
agg_result

Unnamed: 0,from_pcode,to_pcode,euclidean_dist,shortest_path_length,event_id_cnty,event_date,year,disorder_type,event_type,fatalities
4,UA01,UA59,646252.893172,842940.236,UKR148682,16 February 2024,2024,Political violence,Explosions/Remote violence,0
82,UA01,UA59,646252.893172,842940.236,UKR148760,16 February 2024,2024,Political violence,Explosions/Remote violence,0
118,UA01,UA59,646252.893172,842940.236,UKR148586,15 February 2024,2024,Political violence,Explosions/Remote violence,0
159,UA01,UA59,646252.893172,842940.236,UKR148627,15 February 2024,2024,Political violence,Violence against civilians,0
220,UA01,UA59,646252.893172,842940.236,UKR148468,14 February 2024,2024,Political violence,Explosions/Remote violence,0
...,...,...,...,...,...,...,...,...,...,...
95808,UA71,UA74,237616.976449,332229.570,UKR52591,03 March 2022,2022,Political violence,Battles,0
96221,UA71,UA74,237616.976449,332229.570,UKR54919,27 February 2022,2022,Political violence,Explosions/Remote violence,0
96450,UA71,UA74,237616.976449,332229.570,UKR68053,24 February 2022,2022,Political violence,Explosions/Remote violence,0
94275,UA48,UA80,347291.532131,411478.941,UKR54321,20 March 2022,2022,Political violence,Explosions/Remote violence,0


In [107]:
df_grouped = agg_result.groupby(['from_pcode', 'to_pcode', 'euclidean_dist', 'shortest_path_length', 'event_date', 'disorder_type']).agg(
    fatalities_sum=('fatalities', 'sum')
).reset_index()
df_grouped

Unnamed: 0,from_pcode,to_pcode,euclidean_dist,shortest_path_length,event_date,disorder_type,fatalities_sum
0,UA01,UA05,588538.083069,954515.842,01 April 2022,Strategic developments,0
1,UA01,UA05,588538.083069,954515.842,01 April 2023,Political violence,0
2,UA01,UA05,588538.083069,954515.842,01 August 2022,Political violence,0
3,UA01,UA05,588538.083069,954515.842,01 August 2023,Political violence,5
4,UA01,UA05,588538.083069,954515.842,01 December 2022,Political violence,0
...,...,...,...,...,...,...,...
104932,UA80,UA85,694838.606103,980783.310,31 March 2022,Political violence,0
104933,UA80,UA85,694838.606103,980783.310,31 March 2023,Political violence,0
104934,UA80,UA85,694838.606103,980783.310,31 May 2022,Strategic developments,0
104935,UA80,UA85,694838.606103,980783.310,31 May 2023,Political violence,0


In [110]:
df_grouped.fatalities_sum.mean()
len(df_grouped.event_date.unique())

746