In [1]:
import os
import gc
import json
import rasterio
import numpy as np
import pandas as pd
from tqdm import tqdm
import geopandas as gpd
from pathlib import Path
from osgeo import gdal, osr
import dask_geopandas as dgpd
from shapely.geometry import shape
from rasterstats import zonal_stats

BASE_DIR = Path('/Users/wenlanzhang/PycharmProjects/Mapineq/src/data-wrangling/')
DATA_DIR = Path('/Users/wenlanzhang/Downloads/PhD_UCL/Data/Oxford')

In [2]:
df = pd.read_csv('/Users/wenlanzhang/Downloads/VIIRS2021_NUTS_MeanRadiance.csv')
df

Unnamed: 0,system:index,CNTR_CODE,LEVL_CODE,NAME_LATN,NUTS_ID,NUTS_NAME,max,mean,min,stdDev,.geo
0,0000000000000000024d,AT,3,Waldviertel,AT124,Waldviertel,36.462811,0.599892,0.0,1.369225,"{""type"":""Polygon"",""coordinates"":[[[15.54235686..."
1,0000000000000000025b,AT,3,Weinviertel,AT125,Weinviertel,49.652203,0.827358,0.0,1.678783,"{""type"":""Polygon"",""coordinates"":[[[15.75387540..."
2,00000000000000000268,AT,3,Mühlviertel,AT313,Mühlviertel,21.618773,0.379993,0.0,0.955282,"{""type"":""Polygon"",""coordinates"":[[[13.83970189..."
3,0000000000000000027f,AT,3,Wiener Umland/Nordteil,AT126,Wiener Umland/Nordteil,48.293377,1.632597,0.0,2.876856,"{""type"":""Polygon"",""coordinates"":[[[15.73136571..."
4,00000000000000000281,AT,3,Innviertel,AT311,Innviertel,62.802673,0.579643,0.0,1.862056,"{""type"":""Polygon"",""coordinates"":[[[13.72757673..."
...,...,...,...,...,...,...,...,...,...,...,...
1926,00010000000000000003,FR,3,Guadeloupe,FR910,Guadeloupe,50.815113,2.354179,0.0,4.641497,"{""type"":""MultiPolygon"",""coordinates"":[[[[-61.5..."
1927,00010000000000000002,FR,2,Guyane,FR93,Guyane,41.510910,0.025170,-1.5,0.542377,"{""type"":""MultiPolygon"",""coordinates"":[[[[-52.2..."
1928,00010000000000000005,FR,3,Guyane,FR930,Guyane,41.510910,0.025170,-1.5,0.542377,"{""type"":""MultiPolygon"",""coordinates"":[[[[-52.2..."
1929,00010000000000000001,FR,2,Martinique,FR92,Martinique,57.269428,3.073083,0.0,5.346626,"{""type"":""MultiPolygon"",""coordinates"":[[[[-60.8..."


In [3]:
# Drop the 'system:index' column
if 'system:index' in df.columns:
    df = df.drop(columns=['system:index'])

# Convert .geo (GeoJSON) to WKT
def geojson_to_wkt(geo_str):
    try:
        geom = shape(json.loads(geo_str))
        return geom.wkt
    except Exception as e:
        print(f"Error parsing geometry: {e}")
        return None

df['WKT'] = df['.geo'].apply(geojson_to_wkt)

# Drop the original .geo column
df = df.drop(columns=['.geo'])
df

Unnamed: 0,CNTR_CODE,LEVL_CODE,NAME_LATN,NUTS_ID,NUTS_NAME,max,mean,min,stdDev,WKT
0,AT,3,Waldviertel,AT124,Waldviertel,36.462811,0.599892,0.0,1.369225,"POLYGON ((15.5423568687789 48.90795874625594, ..."
1,AT,3,Weinviertel,AT125,Weinviertel,49.652203,0.827358,0.0,1.678783,POLYGON ((15.753875401943136 48.85239782145810...
2,AT,3,Mühlviertel,AT313,Mühlviertel,21.618773,0.379993,0.0,0.955282,POLYGON ((13.839701890307119 48.77145300222119...
3,AT,3,Wiener Umland/Nordteil,AT126,Wiener Umland/Nordteil,48.293377,1.632597,0.0,2.876856,POLYGON ((15.731365711283962 48.38419992107961...
4,AT,3,Innviertel,AT311,Innviertel,62.802673,0.579643,0.0,1.862056,POLYGON ((13.727576738328024 48.51302851665202...
...,...,...,...,...,...,...,...,...,...,...
1926,FR,3,Guadeloupe,FR910,Guadeloupe,50.815113,2.354179,0.0,4.641497,MULTIPOLYGON (((-61.59913904577497 15.87588239...
1927,FR,2,Guyane,FR93,Guyane,41.510910,0.025170,-1.5,0.542377,MULTIPOLYGON (((-52.20562475873677 4.928226277...
1928,FR,3,Guyane,FR930,Guyane,41.510910,0.025170,-1.5,0.542377,MULTIPOLYGON (((-52.20562475873677 4.928226277...
1929,FR,2,Martinique,FR92,Martinique,57.269428,3.073083,0.0,5.346626,MULTIPOLYGON (((-60.862354506221294 14.6082130...


In [11]:
# df = df[df['LEVL_CODE'] == 3]

In [4]:
df.to_csv("/Users/wenlanzhang/Downloads/formatted_for_qgis.csv", index=True, index_label="id")

In [5]:
# Drop the specified columns
df = df.drop(columns=['CNTR_CODE', 'LEVL_CODE', 'NAME_LATN', 'WKT'])

# Rename 'NUTS_ID' to 'geo_source'
df = df.rename(columns={'NUTS_ID': 'geo_source'})

cols = [col for col in df.columns if col != 'geo_source'] + ['geo_source']
df = df[cols]

df

Unnamed: 0,NUTS_NAME,max,mean,min,stdDev,geo_source
0,Waldviertel,36.462811,0.599892,0.0,1.369225,AT124
1,Weinviertel,49.652203,0.827358,0.0,1.678783,AT125
2,Mühlviertel,21.618773,0.379993,0.0,0.955282,AT313
3,Wiener Umland/Nordteil,48.293377,1.632597,0.0,2.876856,AT126
4,Innviertel,62.802673,0.579643,0.0,1.862056,AT311
...,...,...,...,...,...,...
1926,Guadeloupe,50.815113,2.354179,0.0,4.641497,FR910
1927,Guyane,41.510910,0.025170,-1.5,0.542377,FR93
1928,Guyane,41.510910,0.025170,-1.5,0.542377,FR930
1929,Martinique,57.269428,3.073083,0.0,5.346626,FR92


In [6]:
df.to_csv("/Users/wenlanzhang/Downloads/formatted_output.csv", index=True, index_label="id")