In this script, the landuse area (i.e. exact polygon) is added to the mines included in the production dataset. 

In [1]:
import pandas as pd
import geopandas as gpd



## Read Data

In [2]:
facilities = gpd.read_file("data/production/facilities.gpkg")
polygons = gpd.read_file("data/polygons/polygons.gpkg")

## Preprocessing of facilities

In [3]:
# only keep facilities, not sub-sites
facilities = facilities[facilities["sub_site_name"].isnull()]

# only keep facilities that have non-empty geometry
facilities = facilities[~facilities.is_empty]

## View the data

In [4]:
# display(facilities)
# display(polygons)
# facilities.explore()
# polygons.explore()

In [10]:
### TEST: what happens if we intersect a multipoint with polygons? 
test_facilities = facilities[facilities["facility_id"] == "COM00007.00"]
display(test_facilities)
gpd.sjoin(test_facilities, polygons, how='inner', predicate='intersects', lsuffix='left', rsuffix='right')

Unnamed: 0,facility_id,facility_name,facility_other_names,sub_site_name,sub_site_other_names,facility_type,primary_commodity,commodities_products,facility_equipment,production_start,...,concession_area_sq_km,country,GID_0,GID_1,GID_2,GID_3,GID_4,source_id,comment,geometry
6,COM00007.00,AGA Mineracao,,,,Mine,Gold,Gold,"Underground, Open pit, Heap leaching plant",,...,,Brazil,BRA,BRA.13_1,BRA.13.662_1 ; BRA.13.672_1,BRA.13.662.2_1 ; BRA.13.662.4_1 ; BRA.13.672.1...,,det_1057,,"MULTIPOINT (-43.73907 -19.86773, -43.76980 -19..."


Unnamed: 0,facility_id,facility_name,facility_other_names,sub_site_name,sub_site_other_names,facility_type,primary_commodity,commodities_products,facility_equipment,production_start,...,GID_2,GID_3,GID_4,source_id,comment,geometry,index_right,ISO3_CODE,COUNTRY_NAME,AREA
6,COM00007.00,AGA Mineracao,,,,Mine,Gold,Gold,"Underground, Open pit, Heap leaching plant",,...,BRA.13.662_1 ; BRA.13.672_1,BRA.13.662.2_1 ; BRA.13.662.4_1 ; BRA.13.672.1...,,det_1057,,"MULTIPOINT (-43.73907 -19.86773, -43.76980 -19...",3230,BRA,Brazil,0.246638
6,COM00007.00,AGA Mineracao,,,,Mine,Gold,Gold,"Underground, Open pit, Heap leaching plant",,...,BRA.13.662_1 ; BRA.13.672_1,BRA.13.662.2_1 ; BRA.13.662.4_1 ; BRA.13.672.1...,,det_1057,,"MULTIPOINT (-43.73907 -19.86773, -43.76980 -19...",3263,BRA,Brazil,0.172577
6,COM00007.00,AGA Mineracao,,,,Mine,Gold,Gold,"Underground, Open pit, Heap leaching plant",,...,BRA.13.662_1 ; BRA.13.672_1,BRA.13.662.2_1 ; BRA.13.662.4_1 ; BRA.13.672.1...,,det_1057,,"MULTIPOINT (-43.73907 -19.86773, -43.76980 -19...",3302,BRA,Brazil,0.830338


Conclusion of the test: a spatial join of a multipoint with polygons returns all the intersections of the multipoint and the poylgons. 

## Join the data
We want two dataframes:
- One containing all mines included in the production data, including the mine coordinates, and the (total of all multipoints) area of the intersecting polygon
- Another one with just the polygons intersecting with point coordinates, that we can then add as a layer to the geopackage. 

For now, we match polygons that intersect with mine points only. Then, we check how many intersections we get.
However, later we want to also test to intersect a radius around the points with the polygons. 

In [18]:
# produce the intersection
df = gpd.sjoin(facilities, polygons, how='inner', predicate='intersects', lsuffix='left', rsuffix='right')

display(df)
# for the mines that are represented with multipoints, we have to add up the mining area. 
area = df.groupby("facility_id").sum("AREA").loc[:, "AREA"]
# pd.merge(facilities, )

Unnamed: 0,facility_id,facility_name,facility_other_names,sub_site_name,sub_site_other_names,facility_type,primary_commodity,commodities_products,facility_equipment,production_start,...,GID_2,GID_3,GID_4,source_id,comment,geometry,index_right,ISO3_CODE,COUNTRY_NAME,AREA
0,COM00001.00,777,,,,Mine,Copper,"Copper, Zinc, Gold, Silver",Underground,,...,CAN.3.13_1,CAN.3.13.2_1,,det_1149,,MULTIPOINT (-101.87946 54.77482),18122,CAN,Canada,6.342012
1,COM00002.00,A Narrain,,,,Mine,Iron,Iron ore,Open pit,,...,IND.16.11_1,IND.16.11.2_1,,det_1307,,MULTIPOINT (76.20841 14.22353),5610,IND,India,2.107415
2,COM00003.00,Absaloka,,,,Mine,Coal,Sub-bituminous coal,,1974.0,...,USA.27.2_1,,,det_1427,,MULTIPOINT (-107.08290 45.80531),17055,USA,United States,0.522208
4,COM00005.00,Achinsk Alumina Refinery,,,,Refinery,Processing,"Alumina, Calcined soda",Sinter plant,1970.0,...,RUS.35.2_1,RUS.35.2.1_1,,det_1289,,MULTIPOINT (90.42397 56.23464),17558,RUS,Russian Federation,22.217876
6,COM00007.00,AGA Mineracao,,,,Mine,Gold,Gold,"Underground, Open pit, Heap leaching plant",,...,BRA.13.662_1 ; BRA.13.672_1,BRA.13.662.2_1 ; BRA.13.662.4_1 ; BRA.13.672.1...,,det_1057,,"MULTIPOINT (-43.73907 -19.86773, -43.76980 -19...",3230,BRA,Brazil,0.246638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1422,COM01423.00,Zhezkazgan North,,,,Mine,Copper,"Copper, Silver",,,...,KAZ.9.8_1,,,det_1123,,MULTIPOINT (67.45704 47.90149),12413,KAZ,Kazakhstan,16.377312
1424,COM01425.00,Zhezkazgan South and Stepnoy,,,,Mine,Copper,"Copper, Silver",,,...,KAZ.9.8_1,,,det_1123,,MULTIPOINT (67.38472 47.82432),12601,KAZ,Kazakhstan,12.511847
1428,COM01429.00,Zibulo,Zondagsfontein,,,Mine,Coal,Thermal coal,,,...,ZAF.6.3_1,ZAF.6.3.3_1,ZAF.6.3.3.24_1,det_1103,,MULTIPOINT (29.01740 -26.20006),2610,ZAF,South Africa,0.512426
1429,COM01430.00,Zijinshan,Fujian Zijinshan,,,Mine,Gold,"Gold, Copper, Copper cathodes, Silver",Open Pit,1980.0,...,CHN.4.2_1,CHN.4.2.4_1,,det_1315,,MULTIPOINT (116.40544 25.18979),8770,CHN,China,18.908502


Unnamed: 0_level_0,AREA
facility_id,Unnamed: 1_level_1
COM00001.00,6.342012
COM00002.00,2.107415
COM00003.00,0.522208
COM00005.00,22.217876
COM00007.00,1.249552
...,...
COM01425.00,12.511847
COM01428.00,63.669780
COM01429.00,0.512426
COM01430.00,18.908502


In [9]:
df.geom_type.unique()
df

Unnamed: 0,facility_id,facility_name,facility_other_names,sub_site_name,sub_site_other_names,facility_type,primary_commodity,commodities_products,facility_equipment,production_start,...,GID_2,GID_3,GID_4,source_id,comment,geometry,index_right,ISO3_CODE,COUNTRY_NAME,AREA
0,COM00001.00,777,,,,Mine,Copper,"Copper, Zinc, Gold, Silver",Underground,,...,CAN.3.13_1,CAN.3.13.2_1,,det_1149,,MULTIPOINT (-101.87946 54.77482),18122,CAN,Canada,6.342012
1,COM00002.00,A Narrain,,,,Mine,Iron,Iron ore,Open pit,,...,IND.16.11_1,IND.16.11.2_1,,det_1307,,MULTIPOINT (76.20841 14.22353),5610,IND,India,2.107415
2,COM00003.00,Absaloka,,,,Mine,Coal,Sub-bituminous coal,,1974.0,...,USA.27.2_1,,,det_1427,,MULTIPOINT (-107.08290 45.80531),17055,USA,United States,0.522208
4,COM00005.00,Achinsk Alumina Refinery,,,,Refinery,Processing,"Alumina, Calcined soda",Sinter plant,1970.0,...,RUS.35.2_1,RUS.35.2.1_1,,det_1289,,MULTIPOINT (90.42397 56.23464),17558,RUS,Russian Federation,22.217876
6,COM00007.00,AGA Mineracao,,,,Mine,Gold,Gold,"Underground, Open pit, Heap leaching plant",,...,BRA.13.662_1 ; BRA.13.672_1,BRA.13.662.2_1 ; BRA.13.662.4_1 ; BRA.13.672.1...,,det_1057,,"MULTIPOINT (-43.73907 -19.86773, -43.76980 -19...",3230,BRA,Brazil,0.246638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1422,COM01423.00,Zhezkazgan North,,,,Mine,Copper,"Copper, Silver",,,...,KAZ.9.8_1,,,det_1123,,MULTIPOINT (67.45704 47.90149),12413,KAZ,Kazakhstan,16.377312
1424,COM01425.00,Zhezkazgan South and Stepnoy,,,,Mine,Copper,"Copper, Silver",,,...,KAZ.9.8_1,,,det_1123,,MULTIPOINT (67.38472 47.82432),12601,KAZ,Kazakhstan,12.511847
1428,COM01429.00,Zibulo,Zondagsfontein,,,Mine,Coal,Thermal coal,,,...,ZAF.6.3_1,ZAF.6.3.3_1,ZAF.6.3.3.24_1,det_1103,,MULTIPOINT (29.01740 -26.20006),2610,ZAF,South Africa,0.512426
1429,COM01430.00,Zijinshan,Fujian Zijinshan,,,Mine,Gold,"Gold, Copper, Copper cathodes, Silver",Open Pit,1980.0,...,CHN.4.2_1,CHN.4.2.4_1,,det_1315,,MULTIPOINT (116.40544 25.18979),8770,CHN,China,18.908502


In [None]:
import os

# create the intermediate directory if it does not exist
path = "./intermediate"
isExist = os.path.exists(path)
if not isExist:
    os.makedirs(path)
    print("The new directory is created!")

# write the joined dataframe to intermediate 
df.to_file("intermediate/joined_df.gpkg")