In [1]:
#pip install geopandas

The json file contains metadata from the website https://facethefuture.com/#projects ,and the scripts shows preprocessing and shapefile generation

In [2]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape, Polygon, MultiPolygon, LineString, MultiLineString, Point, MultiPoint

In [3]:
df_raw = pd.read_json("../input/FaceTheFuture/FaceTheFuture.json")
df_raw.head(1)

Unnamed: 0,type,id,geometry,properties
0,Feature,1,"{'type': 'Polygon', 'coordinates': [[[777975.6...",{'FID': 1}


In [4]:
df = pd.concat([df_raw.drop(columns=["properties"]), df_raw["properties"].apply(pd.Series)], axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396 entries, 0 to 395
Data columns (total 23 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   type             396 non-null    object 
 1   id               396 non-null    int64  
 2   geometry         396 non-null    object 
 3   FID              322 non-null    float64
 4   OBJECTID         74 non-null     float64
 5   GlobalID         74 non-null     object 
 6   Creator          74 non-null     object 
 7   CreationDate     74 non-null     float64
 8   LastEditor       74 non-null     object 
 9   LastEditDate     74 non-null     float64
 10  Compartment_ID   74 non-null     float64
 11  Phase            74 non-null     float64
 12  PlantingYear     74 non-null     object 
 13  PlantingSeason   74 non-null     object 
 14  ContractPartner  74 non-null     object 
 15  ContractArea     74 non-null     float64
 16  PlantingDensity  74 non-null     float64
 17  Shape__Area     

In [5]:
df

Unnamed: 0,type,id,geometry,FID,OBJECTID,GlobalID,Creator,CreationDate,LastEditor,LastEditDate,...,PlantingSeason,ContractPartner,ContractArea,PlantingDensity,Shape__Area,Shape__Length,TreeNumber,PlantingYear_no,Operator,Comments
0,Feature,1,"{'type': 'Polygon', 'coordinates': [[[777975.6...",1.0,,,,,,,...,,,,,,,,,,
1,Feature,2,"{'type': 'Polygon', 'coordinates': [[[778221.1...",2.0,,,,,,,...,,,,,,,,,,
2,Feature,3,"{'type': 'Polygon', 'coordinates': [[[782087.2...",3.0,,,,,,,...,,,,,,,,,,
3,Feature,4,"{'type': 'Polygon', 'coordinates': [[[779656.8...",4.0,,,,,,,...,,,,,,,,,,
4,Feature,5,"{'type': 'Polygon', 'coordinates': [[[778547.0...",5.0,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,Feature,88,"{'type': 'MultiPolygon', 'coordinates': [[[[33...",,88.0,a89bfe12-fa97-4c45-80ae-bd30d83621ce,FaceTheFuture,1.639063e+12,FaceTheFuture,1.639063e+12,...,Sept-Oct 2021,TreesForAll,48.0,2500.0,0.000000,0.0746,120000.0,2021.0,Kamara Patrick,
392,Feature,89,"{'type': 'Polygon', 'coordinates': [[[3372119....",,89.0,00b179df-0cec-48e4-bfb5-d139f58c724e,FaceTheFuture,1.644654e+12,FaceTheFuture,1.688721e+12,...,April may,Greenchoice,4.0,400.0,0.000003,0.0138,1600.0,2014.0,Kamara Patrick,
393,Feature,91,"{'type': 'Polygon', 'coordinates': [[[3375034....",,91.0,d4c06d5c-5dad-4101-95c2-98031bd4c9b5,FaceTheFuture,1.652693e+12,FaceTheFuture,1.688721e+12,...,APRI_MAY,Greenchoice,50.0,1111.0,0.000000,0.0388,55555.0,2022.0,Kamara Patrick,
394,Feature,95,"{'type': 'MultiPolygon', 'coordinates': [[[[33...",,95.0,fc228588-6da3-4d2e-af30-28ad694ca27f,FaceTheFuture,1.667296e+12,FaceTheFuture,1.688721e+12,...,October,Greenchoice,50.0,1111.0,0.000000,0.0585,55555.0,2022.0,Kamara Patrick,


### Fix geometries

In [6]:
df['geometry'] = df['geometry'].apply(
    lambda geom: shape(geom) if isinstance(geom, dict) and geom.get('type') in ['Polygon', 'MultiPolygon', 'LineString', 'MultiLineString', 'Point', 'MultiPoint'] else None
)

In [7]:
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs = 'EPSG:3857').to_crs('EPSG:4326')

In [8]:
gdf['geometry'] = gdf['geometry'].apply(
    lambda geom: Polygon(list(geom.coords) + [geom.coords[0]]) if isinstance(geom, LineString) and not geom.is_closed and len(geom.coords) > 0 else
                 Polygon(geom.coords) if isinstance(geom, LineString) and geom.is_closed else
                 geom
)

In [9]:
while 'MultiPolygon' in gdf.geometry.geom_type.unique().tolist():
    gdf = gdf.explode(index_parts=False)
    gdf['geometry'] = gdf['geometry'].make_valid()

In [10]:
gdf = gdf.loc[gdf.geometry.geom_type.isin(['Polygon', 'Point'])].copy()

### Harmonizing nomenclature

In [11]:
filtered_columns = ["GlobalID", "id", "TreeNumber", "ContractArea", "PlantingYear_no", "Comments", "Creator", "geometry"]
gdf = gdf[filtered_columns].copy()

In [12]:
new_column_names = {
    "GlobalID": "project_id_reported",
    "id": "site_id_reported",
    "TreeNumber": "trees_planted_reported",
    "ContractArea": "site_sqkm_reported",
    "PlantingYear_no": "planting_date_reported",
    "Comments": "project_description_reported",
    "geometry": "geometry"
}
gdf = gdf.rename(columns=new_column_names)

### Fixing Geometry

In [13]:
invalid_geom = False

if len(gdf[gdf.geometry.is_valid == False]) > 0:
    invalid_geom = True

In [14]:
gdf['site_sqkm'] = gdf['geometry'].to_crs('EPSG:3857').area / 1e6
gdf['host_name'] = 'Face the Future'
gdf['url'] = 'https://facethefuture.com/#projects'
gdf['project_geometries_invalid'] = invalid_geom
gdf["planting_date_type"]="Planting year"
gdf = gdf.assign(species_count_reported=None,country=None, species_planted_reported=None,survival_rate_reported=None)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 568 entries, 0 to 395
Data columns (total 17 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   project_id_reported           94 non-null     object  
 1   site_id_reported              568 non-null    int64   
 2   trees_planted_reported        94 non-null     float64 
 3   site_sqkm_reported            94 non-null     float64 
 4   planting_date_reported        94 non-null     float64 
 5   project_description_reported  2 non-null      object  
 6   Creator                       94 non-null     object  
 7   geometry                      568 non-null    geometry
 8   site_sqkm                     568 non-null    float64 
 9   host_name                     568 non-null    object  
 10  url                           568 non-null    object  
 11  project_geometries_invalid    568 non-null    bool    
 12  planting_date_type            568 non-null    o

In [15]:
gdf.to_file('../midsave/face_the_future.gpkg')