This code is the post GEE data wrangling for yearly and monthly night time light data

**NUTS**: 
- 2003: 1891 rows
- 2006: 1931 rows
- 2010: 1920 rows
- 2013: 1951 rows
- 2016: 2016 rows
- 2021: 2010 rows
- 2024: 1798 rows  
- **Total**: 13517

**ITL**: 
- 2021: 232
- 2025: 240
- **Total**: 472


**EURO**:
- EURO2021: 2243
- EURO2025: 2039
- **Total**: 4282

Data issue:
> Monthly data: doesn't have UK until 9 2012 (ITL has removed 20120401-20240801)  
> Monthly Stray Light: doesn't have 20241101

In [1]:
import re
import os
import gc
import json
import rasterio
import numpy as np
import pandas as pd
from tqdm import tqdm
import geopandas as gpd
from pathlib import Path
from osgeo import gdal, osr
import dask_geopandas as dgpd
from shapely.geometry import shape
from rasterstats import zonal_stats
from validator import validate

BASE_DIR = Path('/Users/wenlanzhang/PycharmProjects/Mapineq/src/data-wrangling/')
DATA_DIR = Path('/Users/wenlanzhang/Downloads/PhD_UCL/Data/Oxford')

# Annual (2013-2024: 12 YEARS)
https://code.earthengine.google.com/9df7d68e50a39bf1e5804733d0b06d0c

## NUTS - done

In [8]:
df2224 = pd.read_csv(DATA_DIR/'GEE/NTL/VIIRS_NUTS_All_22_24.csv')
# df2224
df2224['geo_source'].unique()

array(['NUTS2003', 'NUTS2006', 'NUTS2010', 'NUTS2013', 'NUTS2016',
       'NUTS2021', 'NUTS2024'], dtype=object)

In [9]:
df1321 = pd.read_csv(DATA_DIR/'GEE/NTL/VIIRS_NUTS_All_13_21.csv')
df1321 = df1321.drop('min', axis=1)
# df1321
df1321['geo_source'].unique()

array(['NUTS2003', 'NUTS2006', 'NUTS2010', 'NUTS2013', 'NUTS2016',
       'NUTS2021', 'NUTS2024'], dtype=object)

In [10]:
df_combined = pd.concat([df1321, df2224])
df_combined
# len(df_combined['obsTime'].unique())

Unnamed: 0,NUTS_ID,mean,stdDev,max,obsTime,geo_source
0,AT,0.584123,2.676161,363.295990,2013,NUTS2003
1,AT3,0.506815,2.715370,363.295990,2013,NUTS2003
2,AT1,0.992977,3.448908,167.207932,2013,NUTS2003
3,AT2,0.308386,1.465850,152.990356,2013,NUTS2003
4,AT34,0.833707,2.294287,43.166061,2013,NUTS2003
...,...,...,...,...,...,...
40546,SI037,0.545448,1.391197,23.812185,2024,NUTS2024
40547,SI038,0.347721,0.882950,14.631371,2024,NUTS2024
40548,SI043,0.420442,1.242523,25.155533,2024,NUTS2024
40549,SK041,0.353924,1.380300,31.594156,2024,NUTS2024


In [3]:
12*13517

162204

In [12]:
df_combined = df_combined.rename(columns={'NUTS_ID': 'geo', 'stdDev': 'std_dev'})
# Melt the dataframe to long format
df_long = pd.melt(df_combined, 
                  id_vars=['geo', 'geo_source', 'obsTime'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in df_long.columns if col != 'geo_source'] + ['geo_source']
df_long = df_long[cols]
df_long

Unnamed: 0,geo,obsTime,metric,obsValue,geo_source
0,AT,2013,max,363.295990,NUTS2003
1,AT3,2013,max,363.295990,NUTS2003
2,AT1,2013,max,167.207932,NUTS2003
3,AT2,2013,max,152.990356,NUTS2003
4,AT34,2013,max,43.166061,NUTS2003
...,...,...,...,...,...
486607,SI037,2024,std_dev,1.391197,NUTS2024
486608,SI038,2024,std_dev,0.882950,NUTS2024
486609,SI043,2024,std_dev,1.242523,NUTS2024
486610,SK041,2024,std_dev,1.380300,NUTS2024


In [13]:
df_long.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Annual_1324.csv", index=True, index_label="id")

### Test  

- geo_source (7): 'NUTS2003', 'NUTS2006', 'NUTS2010', 'NUTS2013', 'NUTS2016', 'NUTS2021', 'NUTS2024'  
- metric (3): 'max', 'mean', 'std_dev'
- year (12): 2013 - 2024

In [9]:
df_long = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Annual_NUTS_1324.csv')
df_long

Unnamed: 0,id,geo,obsTime,metric,obsValue,geo_source
0,0,AT,2013,max,363.295990,NUTS2003
1,1,AT3,2013,max,363.295990,NUTS2003
2,2,AT1,2013,max,167.207932,NUTS2003
3,3,AT2,2013,max,152.990356,NUTS2003
4,4,AT34,2013,max,43.166061,NUTS2003
...,...,...,...,...,...,...
486607,486607,SI037,2024,std_dev,1.391197,NUTS2024
486608,486608,SI038,2024,std_dev,0.882950,NUTS2024
486609,486609,SI043,2024,std_dev,1.242523,NUTS2024
486610,486610,SK041,2024,std_dev,1.380300,NUTS2024


In [10]:
validate(data = df_long)

Unnamed: 0,id,geo,obsTime,metric,obsValue,geo_source
0,0,AT,2013,max,363.295990,NUTS2003
1,1,AT3,2013,max,363.295990,NUTS2003
2,2,AT1,2013,max,167.207932,NUTS2003
3,3,AT2,2013,max,152.990356,NUTS2003
4,4,AT34,2013,max,43.166061,NUTS2003
...,...,...,...,...,...,...
486607,486607,SI037,2024,std_dev,1.391197,NUTS2024
486608,486608,SI038,2024,std_dev,0.882950,NUTS2024
486609,486609,SI043,2024,std_dev,1.242523,NUTS2024
486610,486610,SK041,2024,std_dev,1.380300,NUTS2024


In [6]:
df_long['obsTime'].nunique()

12

In [28]:
13517 * 12 * 3

486612

## ITL - done

In [10]:
df_ITL_2224 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_All_22_24.csv')
df_ITL_1321 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_All_13_21.csv')
# df_ITL_2224
# df_ITL_1321
print(len(df_ITL_2224))
print(len(df_ITL_1321))

1416
4248


In [15]:
itl_combined = pd.concat([df_ITL_2224, df_ITL_1321])
itl_combined

Unnamed: 0,geo,mean,stdDev,max,obsTime,geo_source
0,TLC,2.055000,6.587237,282.932861,2022,ITL2021
1,TLD,3.082955,7.768634,247.588699,2022,ITL2021
2,TLE,2.489728,6.813055,284.615753,2022,ITL2021
3,TLF,2.046700,5.018148,158.123749,2022,ITL2021
4,TLG,2.818651,7.014117,142.357117,2022,ITL2021
...,...,...,...,...,...,...
4243,TLN0C,0.671375,1.896544,30.395159,2021,ITL2025
4244,TLN0D,2.141897,4.825698,47.231506,2021,ITL2025
4245,TLN0E,2.426179,4.142831,36.411503,2021,ITL2025
4246,TLN0F,0.983335,2.766371,35.193775,2021,ITL2025


In [16]:
itl_combined = itl_combined.rename(columns={'stdDev': 'std_dev'})
# Melt the dataframe to long format
itl_long = pd.melt(itl_combined, 
                  id_vars=['geo', 'geo_source', 'obsTime'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in itl_long.columns if col != 'geo_source'] + ['geo_source']
itl_long = itl_long[cols]
itl_long

Unnamed: 0,geo,obsTime,metric,obsValue,geo_source
0,TLC,2022,max,282.932861,ITL2021
1,TLD,2022,max,247.588699,ITL2021
2,TLE,2022,max,284.615753,ITL2021
3,TLF,2022,max,158.123749,ITL2021
4,TLG,2022,max,142.357117,ITL2021
...,...,...,...,...,...
16987,TLN0C,2021,std_dev,1.896544,ITL2025
16988,TLN0D,2021,std_dev,4.825698,ITL2025
16989,TLN0E,2021,std_dev,4.142831,ITL2025
16990,TLN0F,2021,std_dev,2.766371,ITL2025


In [11]:
# itl_long.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Annual_ITL_1324.csv", index=True, index_label="id")

itl_long = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Annual_ITL_1324.csv')
validate(data=itl_long)

Unnamed: 0,id,geo,obsTime,metric,obsValue,geo_source
0,0,TLC,2022,max,282.932861,ITL2021
1,1,TLD,2022,max,247.588699,ITL2021
2,2,TLE,2022,max,284.615753,ITL2021
3,3,TLF,2022,max,158.123749,ITL2021
4,4,TLG,2022,max,142.357117,ITL2021
...,...,...,...,...,...,...
16987,16987,TLN0C,2021,std_dev,1.896544,ITL2025
16988,16988,TLN0D,2021,std_dev,4.825698,ITL2025
16989,16989,TLN0E,2021,std_dev,4.142831,ITL2025
16990,16990,TLN0F,2021,std_dev,2.766371,ITL2025


In [29]:
472 * 12*3

16992

## EURO - done

In [33]:
df_EURO_1321 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_13_21.csv')
df_EURO_2224 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_22_24.csv')
print(len(df_EURO_1321))
print(len(df_EURO_2224))
df_EURO_1321

38538
12846


Unnamed: 0,geo,mean,stdDev,max,obsTime,geo_source
0,AL,0.531731,3.125653,157.061478,2013,EURO2021
1,AL0,0.531731,3.125653,157.061478,2013,EURO2021
2,AL01,0.490856,2.917034,131.469543,2013,EURO2021
3,AL02,1.085123,5.181976,157.061478,2013,EURO2021
4,AL03,0.353151,1.997965,83.589897,2013,EURO2021
...,...,...,...,...,...,...
38533,XK006,1.022763,2.457742,46.511379,2021,EURO2025
38534,XK007,1.302272,2.377348,37.723907,2021,EURO2025
38535,XK00,1.409658,3.501441,86.880325,2021,EURO2025
38536,XK0,1.409658,3.501441,86.880325,2021,EURO2025


In [34]:
EURO_combined = pd.concat([df_EURO_1321, df_EURO_2224])
EURO_combined

Unnamed: 0,geo,mean,stdDev,max,obsTime,geo_source
0,AL,0.531731,3.125653,157.061478,2013,EURO2021
1,AL0,0.531731,3.125653,157.061478,2013,EURO2021
2,AL01,0.490856,2.917034,131.469543,2013,EURO2021
3,AL02,1.085123,5.181976,157.061478,2013,EURO2021
4,AL03,0.353151,1.997965,83.589897,2013,EURO2021
...,...,...,...,...,...,...
12841,XK006,1.353443,3.124474,52.121231,2024,EURO2025
12842,XK007,1.718228,3.071854,46.252934,2024,EURO2025
12843,XK00,1.809400,4.399110,99.012657,2024,EURO2025
12844,XK0,1.809400,4.399110,99.012657,2024,EURO2025


In [36]:
EURO_combined = EURO_combined.rename(columns={'stdDev': 'std_dev'})
# Melt the dataframe to long format
EURO_long = pd.melt(EURO_combined, 
                  id_vars=['geo', 'geo_source', 'obsTime'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in EURO_long.columns if col != 'geo_source'] + ['geo_source']
EURO_long = EURO_long[cols]
EURO_long

Unnamed: 0,geo,obsTime,metric,obsValue,geo_source
0,AL,2013,max,157.061478,EURO2021
1,AL0,2013,max,157.061478,EURO2021
2,AL01,2013,max,131.469543,EURO2021
3,AL02,2013,max,157.061478,EURO2021
4,AL03,2013,max,83.589897,EURO2021
...,...,...,...,...,...
154147,XK006,2024,std_dev,3.124474,EURO2025
154148,XK007,2024,std_dev,3.071854,EURO2025
154149,XK00,2024,std_dev,4.399110,EURO2025
154150,XK0,2024,std_dev,4.399110,EURO2025


In [13]:
# EURO_long.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Annual_EURO_1324.csv", index=True, index_label="id")

EURO_long = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Annual_EURO_1324.csv')
validate(data=EURO_long)

Unnamed: 0,id,geo,obsTime,metric,obsValue,geo_source
0,0,AL,2013,max,157.061478,EURO2021
1,1,AL0,2013,max,157.061478,EURO2021
2,2,AL01,2013,max,131.469543,EURO2021
3,3,AL02,2013,max,157.061478,EURO2021
4,4,AL03,2013,max,83.589897,EURO2021
...,...,...,...,...,...,...
154147,154147,XK006,2024,std_dev,3.124474,EURO2025
154148,154148,XK007,2024,std_dev,3.071854,EURO2025
154149,154149,XK00,2024,std_dev,4.399110,EURO2025
154150,154150,XK0,2024,std_dev,4.399110,EURO2025


In [30]:
4282*12*3

154152

# Month （20120401 - 20250301）= 156 months

## NUTS - !!!!!!!

In [2]:
df1321_M = pd.read_csv(DATA_DIR/'GEE/NTL/VIIRS_NUTS_Monthly_2013_2021.csv')
df1321_M
# df1321_M['geo_source'].unique()

Unnamed: 0,geo,mean,stdDev,max,obsTime,obsMonth,geo_source
0,AT,0.986477,2.548236,182.200836,2013,1,NUTS2003
1,AT3,0.995040,3.078193,182.200836,2013,1,NUTS2003
2,AT1,1.375383,2.639027,106.621887,2013,1,NUTS2003
3,AT2,0.614355,1.306002,71.294838,2013,1,NUTS2003
4,AT34,1.249731,2.065825,37.559410,2013,1,NUTS2003
...,...,...,...,...,...,...,...
1459831,SI037,0.989436,1.863954,40.930000,2021,12,NUTS2024
1459832,SI038,0.753542,1.090943,16.030001,2021,12,NUTS2024
1459833,SI043,0.796052,1.411274,35.980000,2021,12,NUTS2024
1459834,SK041,0.947011,2.419595,83.790001,2021,12,NUTS2024


In [3]:
df2224_M = pd.read_csv(DATA_DIR/'GEE/NTL/VIIRS_NUTS_Monthly_2022_2024.csv')
df2224_M = df2224_M.rename(columns={'NUTS_ID': 'geo', 'obsYear': 'obsTime'})
df2224_M
# df2224_M['geo_source'].unique()

Unnamed: 0,geo,mean,stdDev,max,obsTime,obsMonth,geo_source
0,AT,1.250350,3.499265,434.630005,2022,1,NUTS2003
1,AT3,1.135695,2.849661,334.390015,2022,1,NUTS2003
2,AT1,1.751773,4.559688,141.559998,2022,1,NUTS2003
3,AT2,0.939442,3.054300,434.630005,2022,1,NUTS2003
4,AT34,1.379717,2.241623,70.169998,2022,1,NUTS2003
...,...,...,...,...,...,...,...
486607,SI037,1.023837,1.785201,45.340000,2024,12,NUTS2024
486608,SI038,0.856611,1.177727,19.650000,2024,12,NUTS2024
486609,SI043,0.880418,1.505450,36.509998,2024,12,NUTS2024
486610,SK041,0.756769,1.343125,34.970001,2024,12,NUTS2024


In [4]:
df_combined_M = pd.concat([df1321_M, df2224_M])
df_combined_M
# len(df_combined_M['obsTime'].unique())

Unnamed: 0,geo,mean,stdDev,max,obsTime,obsMonth,geo_source
0,AT,0.986477,2.548236,182.200836,2013,1,NUTS2003
1,AT3,0.995040,3.078193,182.200836,2013,1,NUTS2003
2,AT1,1.375383,2.639027,106.621887,2013,1,NUTS2003
3,AT2,0.614355,1.306002,71.294838,2013,1,NUTS2003
4,AT34,1.249731,2.065825,37.559410,2013,1,NUTS2003
...,...,...,...,...,...,...,...
486607,SI037,1.023837,1.785201,45.340000,2024,12,NUTS2024
486608,SI038,0.856611,1.177727,19.650000,2024,12,NUTS2024
486609,SI043,0.880418,1.505450,36.509998,2024,12,NUTS2024
486610,SK041,0.756769,1.343125,34.970001,2024,12,NUTS2024


In [5]:
df_combined_M = df_combined_M.rename(columns={'stdDev': 'std_dev'})
# Melt the dataframe to long format
df_long_M = pd.melt(df_combined_M, 
                  id_vars=['geo', 'geo_source', 'obsTime', 'obsMonth'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in df_long_M.columns if col != 'geo_source'] + ['geo_source']
df_long_M = df_long_M[cols]
df_long_M

Unnamed: 0,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,AT,2013,1,max,182.200836,NUTS2003
1,AT3,2013,1,max,182.200836,NUTS2003
2,AT1,2013,1,max,106.621887,NUTS2003
3,AT2,2013,1,max,71.294838,NUTS2003
4,AT34,2013,1,max,37.559410,NUTS2003
...,...,...,...,...,...,...
5839339,SI037,2024,12,std_dev,1.785201,NUTS2024
5839340,SI038,2024,12,std_dev,1.177727,NUTS2024
5839341,SI043,2024,12,std_dev,1.505450,NUTS2024
5839342,SK041,2024,12,std_dev,1.343125,NUTS2024


In [15]:
# df_long_M.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Monthly_NUTS_1324.csv", index=True, index_label="id")

df_long_M = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Monthly_NUTS_1324.csv')
validate(data=df_long_M)

Unnamed: 0,id,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,0,AT,2013,1,max,182.200836,NUTS2003
1,1,AT3,2013,1,max,182.200836,NUTS2003
2,2,AT1,2013,1,max,106.621887,NUTS2003
3,3,AT2,2013,1,max,71.294838,NUTS2003
4,4,AT34,2013,1,max,37.559410,NUTS2003
...,...,...,...,...,...,...,...
5839339,5839339,SI037,2024,12,std_dev,1.785201,NUTS2024
5839340,5839340,SI038,2024,12,std_dev,1.177727,NUTS2024
5839341,5839341,SI043,2024,12,std_dev,1.505450,NUTS2024
5839342,5839342,SK041,2024,12,std_dev,1.343125,NUTS2024


In [31]:
13517 * 156 * 3

6325956

### Export with geometry to Check in QGIS

In [3]:
# # Drop the 'system:index' column
# if 'system:index' in df.columns:
#     df = df.drop(columns=['system:index'])

# # Convert .geo (GeoJSON) to WKT
# def geojson_to_wkt(geo_str):
#     try:
#         geom = shape(json.loads(geo_str))
#         return geom.wkt
#     except Exception as e:
#         print(f"Error parsing geometry: {e}")
#         return None

# df['WKT'] = df['.geo'].apply(geojson_to_wkt)

# # Drop the original .geo column
# df = df.drop(columns=['.geo'])
# df

Unnamed: 0,NUTS_ID,geo_source,max,mean,stdDev,year,WKT
0,AT124,NUTS2006,39.095600,0.485493,1.325237,2020,"POLYGON ((15.5423568687789 48.90795874625594, ..."
1,AT125,NUTS2006,52.678535,0.746196,1.576954,2020,POLYGON ((15.753875401943136 48.85239782145810...
2,AT313,NUTS2006,25.432070,0.314568,0.894521,2020,POLYGON ((13.839701890307119 48.77145300222119...
3,AT126,NUTS2006,68.902916,1.495743,2.753673,2020,POLYGON ((15.731365711283962 48.38419992107961...
4,AT311,NUTS2006,62.645573,0.509443,1.775113,2020,POLYGON ((13.727576738328024 48.51302851665202...
...,...,...,...,...,...,...,...
7639,UKC12,NUTS2003,111.081902,9.822134,15.013664,2021,POLYGON ((-0.7936467003412386 54.5584462763246...
7640,UKC11,NUTS2003,92.253082,10.010130,13.173835,2021,MULTIPOLYGON (((-1.2347878213391148 54.5103682...
7641,UKN03,NUTS2003,46.515072,1.200607,2.830120,2021,MULTIPOLYGON (((-5.641572728675446 54.41375189...
7642,UKE12,NUTS2003,280.591919,1.421410,5.261583,2021,POLYGON ((-1.048596355843249 53.65608239478745...


In [4]:
# df_test = df[df['LEVL_CODE'] == 2]
# df_test.to_csv("/Users/wenlanzhang/Downloads/formatted_for_qgis.csv", index=True, index_label="id")

In [None]:
# # Drop the specified columns
# df = df.drop(columns=['WKT'])

In [5]:
df

Unnamed: 0,geo_source,max,mean,obsTime,stdDev
0,NUTS2003,371.416779,0.700943,2019,3.080153
1,NUTS2003,371.416779,0.562128,2019,2.892622
2,NUTS2003,174.950760,1.227383,2019,4.131787
3,NUTS2003,177.478455,0.398776,2019,1.858099
4,NUTS2003,46.541229,0.807894,2019,2.226023
...,...,...,...,...,...
40546,NUTS2024,25.373180,0.554214,2021,1.470890
40547,NUTS2024,15.038698,0.351601,2021,0.902235
40548,NUTS2024,27.257809,0.400070,2021,1.318365
40549,NUTS2024,46.032799,0.432952,2021,1.798095


## ITL - done

In [46]:
df_month_ITL_1324 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_Monthly_1324.csv') 
df_month_ITL_12 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_Monthly_12_412.csv')
df_month_ITL_12 = df_month_ITL_12[df_month_ITL_12['obsMonth'] >= 9]
df_month_ITL_25 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_Monthly_25_13.csv') 

df_month_ITL_1324

Unnamed: 0,geo,mean,std_dev,max,obsTime,obsMonth,geo_source
0,TLC,5.548803,15.424949,664.742737,2013,1,ITL2021
1,TLD,3.878920,8.003008,222.212662,2013,1,ITL2021
2,TLE,3.992559,9.678961,394.192047,2013,1,ITL2021
3,TLF,3.259599,6.794366,202.956100,2013,1,ITL2021
4,TLG,3.320707,6.751225,104.250961,2013,1,ITL2021
...,...,...,...,...,...,...,...
67963,TLN0C,1.000412,1.749246,45.020000,2024,12,ITL2025
67964,TLN0D,2.430332,5.280000,69.320000,2024,12,ITL2025
67965,TLN0E,2.555985,4.103366,39.830002,2024,12,ITL2025
67966,TLN0F,1.267272,2.721968,50.540001,2024,12,ITL2025


In [48]:
df_combined_M_ITL = pd.concat([df_month_ITL_12, df_month_ITL_1324, df_month_ITL_25])
df_combined_M_ITL

Unnamed: 0,geo,mean,std_dev,max,obsTime,obsMonth,geo_source
2360,TLC,4.987087,14.769474,222.259628,2012,9,ITL2021
2361,TLD,3.718393,9.775169,347.236572,2012,9,ITL2021
2362,TLE,3.467135,9.479391,258.649567,2012,9,ITL2021
2363,TLF,2.717528,6.850071,148.273834,2012,9,ITL2021
2364,TLG,3.466801,8.695662,186.044067,2012,9,ITL2021
...,...,...,...,...,...,...,...
1411,TLN0C,0.965638,1.774518,34.810001,2025,3,ITL2025
1412,TLN0D,2.670141,5.939079,76.400002,2025,3,ITL2025
1413,TLN0E,2.833741,4.515408,53.799999,2025,3,ITL2025
1414,TLN0F,1.344874,2.656239,41.660000,2025,3,ITL2025


In [49]:
df_combined_M_ITL['obsMonth'].unique()

array([ 9, 10, 11, 12,  1,  2,  3,  4,  5,  6,  7,  8])

In [53]:
472  *  151 
# full year: 12 (13-24) * 12 + 3 (2025) + 4 (2012) = 151 (months)

71272

In [55]:
df_combined_M_ITL = df_combined_M_ITL.rename(columns={'stdDev': 'std_dev'})
# Melt the dataframe to long format
df_long_M_ITL = pd.melt(df_combined_M_ITL, 
                  id_vars=['geo', 'geo_source', 'obsTime', 'obsMonth'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in df_long_M_ITL.columns if col != 'geo_source'] + ['geo_source']
df_long_M_ITL = df_long_M_ITL[cols]
df_long_M_ITL

Unnamed: 0,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,TLC,2012,9,max,222.259628,ITL2021
1,TLD,2012,9,max,347.236572,ITL2021
2,TLE,2012,9,max,258.649567,ITL2021
3,TLF,2012,9,max,148.273834,ITL2021
4,TLG,2012,9,max,186.044067,ITL2021
...,...,...,...,...,...,...
213811,TLN0C,2025,3,std_dev,1.774518,ITL2025
213812,TLN0D,2025,3,std_dev,5.939079,ITL2025
213813,TLN0E,2025,3,std_dev,4.515408,ITL2025
213814,TLN0F,2025,3,std_dev,2.656239,ITL2025


In [16]:
# df_long_M_ITL.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Monthly_ITL_1225.csv", index=True, index_label="id")

df_long_M_ITL = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Monthly_ITL_1225.csv') 

validate(data=df_long_M_ITL)

Unnamed: 0,id,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,0,TLC,2012,9,max,222.259628,ITL2021
1,1,TLD,2012,9,max,347.236572,ITL2021
2,2,TLE,2012,9,max,258.649567,ITL2021
3,3,TLF,2012,9,max,148.273834,ITL2021
4,4,TLG,2012,9,max,186.044067,ITL2021
...,...,...,...,...,...,...,...
213811,213811,TLN0C,2025,3,std_dev,1.774518,ITL2025
213812,213812,TLN0D,2025,3,std_dev,5.939079,ITL2025
213813,213813,TLN0E,2025,3,std_dev,4.515408,ITL2025
213814,213814,TLN0F,2025,3,std_dev,2.656239,ITL2025


In [32]:
472 * (156 - 5) *3

213816

## EURO - done

In [74]:
df_month_EURO_1324 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_Monthly_1324.csv') 
df_month_EURO_12 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_Monthly_12.csv')
df_month_EURO_25 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_Monthly_25.csv') 

# df_month_EURO_12

Unnamed: 0,geo,mean,std_dev,max,obsTime,obsMonth,geo_source
0,AL,0.611301,3.278781,201.148239,2012,4,EURO2021
1,AL0,0.611301,3.278781,201.148239,2012,4,EURO2021
2,AL01,0.636890,3.024700,145.610947,2012,4,EURO2021
3,AL02,1.072213,5.129441,201.148239,2012,4,EURO2021
4,AL03,0.411821,2.423458,199.155518,2012,4,EURO2021
...,...,...,...,...,...,...,...
38533,XK006,0.818545,2.615603,65.392540,2012,12,EURO2025
38534,XK007,1.104862,2.624823,45.289597,2012,12,EURO2025
38535,XK00,1.361831,4.537099,142.969604,2012,12,EURO2025
38536,XK0,1.361831,4.537099,142.969604,2012,12,EURO2025


In [75]:
df_combined_M_EURO = pd.concat([df_month_EURO_12, df_month_EURO_1324, df_month_EURO_25])
df_combined_M_EURO

Unnamed: 0,geo,mean,std_dev,max,obsTime,obsMonth,geo_source
0,AL,0.611301,3.278781,201.148239,2012,4,EURO2021
1,AL0,0.611301,3.278781,201.148239,2012,4,EURO2021
2,AL01,0.636890,3.024700,145.610947,2012,4,EURO2021
3,AL02,1.072213,5.129441,201.148239,2012,4,EURO2021
4,AL03,0.411821,2.423458,199.155518,2012,4,EURO2021
...,...,...,...,...,...,...,...
12841,XK006,1.423532,3.137700,50.389999,2025,3,EURO2025
12842,XK007,1.767674,3.227911,61.119999,2025,3,EURO2025
12843,XK00,1.901704,4.685084,148.800003,2025,3,EURO2025
12844,XK0,1.901704,4.685084,148.800003,2025,3,EURO2025


In [80]:
4282  *  156 * 3
# full year: 12 (13-24) * 12 + 3 (2025) + 9 (2012) = 156 (months)

2003976

In [79]:
df_combined_M_EURO = df_combined_M_EURO.rename(columns={'stdDev': 'std_dev'})

# Melt the dataframe to long format
df_combined_M_EURO = pd.melt(df_combined_M_EURO, 
                  id_vars=['geo', 'geo_source', 'obsTime', 'obsMonth'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in df_combined_M_EURO.columns if col != 'geo_source'] + ['geo_source']
df_combined_M_EURO = df_combined_M_EURO[cols]
df_combined_M_EURO

Unnamed: 0,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,AL,2012,4,max,201.148239,EURO2021
1,AL0,2012,4,max,201.148239,EURO2021
2,AL01,2012,4,max,145.610947,EURO2021
3,AL02,2012,4,max,201.148239,EURO2021
4,AL03,2012,4,max,199.155518,EURO2021
...,...,...,...,...,...,...
2003971,XK006,2025,3,std_dev,3.137700,EURO2025
2003972,XK007,2025,3,std_dev,3.227911,EURO2025
2003973,XK00,2025,3,std_dev,4.685084,EURO2025
2003974,XK0,2025,3,std_dev,4.685084,EURO2025


In [18]:
# df_combined_M_EURO.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Monthly_EURO_1225.csv", index=True, index_label="id")

df_combined_M_EURO = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Monthly_EURO_1225.csv') 
validate(data=df_combined_M_EURO)

Unnamed: 0,id,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,0,AL,2012,4,max,201.148239,EURO2021
1,1,AL0,2012,4,max,201.148239,EURO2021
2,2,AL01,2012,4,max,145.610947,EURO2021
3,3,AL02,2012,4,max,201.148239,EURO2021
4,4,AL03,2012,4,max,199.155518,EURO2021
...,...,...,...,...,...,...,...
2003971,2003971,XK006,2025,3,std_dev,3.137700,EURO2025
2003972,2003972,XK007,2025,3,std_dev,3.227911,EURO2025
2003973,2003973,XK00,2025,3,std_dev,4.685084,EURO2025
2003974,2003974,XK0,2025,3,std_dev,4.685084,EURO2025


In [33]:
4282 * 156 *3

2003976

# Month Stray Light (20140101 - 20250301 (-202411) = 134 months)

## NUTS - done

In [23]:
S_month_NUTS_1423 = pd.read_csv(DATA_DIR/'GEE/NTL/VIIRS_NUTS_Monthly_straylight_2014_2023.csv') 
S_month_NUTS_24_1 = pd.read_csv(DATA_DIR/'GEE/NTL/VIIRS_NUTS_Monthly_straylight_2024_1.csv') 
S_month_NUTS_24_2 = pd.read_csv(DATA_DIR/'GEE/NTL/VIIRS_NUTS_Monthly_straylight_2024_2.csv') 
S_month_NUTS_25 = pd.read_csv(DATA_DIR/'GEE/NTL/VIIRS_NUTS_Monthly_straylight_2025.csv') 

S_month_NUTS_1423

Unnamed: 0,geo,mean,std_dev,max,obsTime,obsMonth,geo_source
0,AT,0.729748,2.043080,156.161575,2014,1,NUTS2003
1,AT3,0.743198,2.294332,129.833893,2014,1,NUTS2003
2,AT1,0.947152,2.294405,156.161575,2014,1,NUTS2003
3,AT2,0.510435,1.247023,137.898041,2014,1,NUTS2003
4,AT34,0.949121,1.841226,30.936752,2014,1,NUTS2003
...,...,...,...,...,...,...,...
1622035,SI037,0.761528,1.279041,24.799999,2023,12,NUTS2024
1622036,SI038,0.572651,0.901104,19.190001,2023,12,NUTS2024
1622037,SI043,0.678995,1.382710,38.580002,2023,12,NUTS2024
1622038,SK041,1.120274,3.311446,131.570007,2023,12,NUTS2024


In [24]:
S_combined_M_NUTS = pd.concat([S_month_NUTS_1423, S_month_NUTS_24_1, S_month_NUTS_24_2, S_month_NUTS_25])
S_combined_M_NUTS

Unnamed: 0,geo,mean,std_dev,max,obsTime,obsMonth,geo_source
0,AT,0.729748,2.043080,156.161575,2014,1,NUTS2003
1,AT3,0.743198,2.294332,129.833893,2014,1,NUTS2003
2,AT1,0.947152,2.294405,156.161575,2014,1,NUTS2003
3,AT2,0.510435,1.247023,137.898041,2014,1,NUTS2003
4,AT34,0.949121,1.841226,30.936752,2014,1,NUTS2003
...,...,...,...,...,...,...,...
40546,SI037,0.714051,1.373483,25.940001,2025,3,NUTS2024
40547,SI038,0.503888,0.718520,14.170000,2025,3,NUTS2024
40548,SI043,0.616872,1.254802,28.920000,2025,3,NUTS2024
40549,SK041,0.597635,1.403199,45.360001,2025,3,NUTS2024


In [27]:
S_combined_M_NUTS = S_combined_M_NUTS.rename(columns={'stdDev': 'std_dev'})

# Melt the dataframe to long format
S_combined_M_NUTS = pd.melt(S_combined_M_NUTS, 
                  id_vars=['geo', 'geo_source', 'obsTime', 'obsMonth'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in S_combined_M_NUTS.columns if col != 'geo_source'] + ['geo_source']
S_combined_M_NUTS = S_combined_M_NUTS[cols]
S_combined_M_NUTS

Unnamed: 0,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,AT,2014,1,max,156.161575,NUTS2003
1,AT3,2014,1,max,129.833893,NUTS2003
2,AT1,2014,1,max,156.161575,NUTS2003
3,AT2,2014,1,max,137.898041,NUTS2003
4,AT34,2014,1,max,30.936752,NUTS2003
...,...,...,...,...,...,...
5433829,SI037,2025,3,std_dev,1.373483,NUTS2024
5433830,SI038,2025,3,std_dev,0.718520,NUTS2024
5433831,SI043,2025,3,std_dev,1.254802,NUTS2024
5433832,SK041,2025,3,std_dev,1.403199,NUTS2024


In [20]:
# S_combined_M_NUTS.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Monthly_Stray_NUTS_1425.csv", index=True, index_label="id")

S_combined_M_NUTS = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Monthly_Stray_NUTS_1425.csv') 
validate(data=S_combined_M_NUTS)

Unnamed: 0,id,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,0,AT,2014,1,max,156.161575,NUTS2003
1,1,AT3,2014,1,max,129.833893,NUTS2003
2,2,AT1,2014,1,max,156.161575,NUTS2003
3,3,AT2,2014,1,max,137.898041,NUTS2003
4,4,AT34,2014,1,max,30.936752,NUTS2003
...,...,...,...,...,...,...,...
5433829,5433829,SI037,2025,3,std_dev,1.373483,NUTS2024
5433830,5433830,SI038,2025,3,std_dev,0.718520,NUTS2024
5433831,5433831,SI043,2025,3,std_dev,1.254802,NUTS2024
5433832,5433832,SK041,2025,3,std_dev,1.403199,NUTS2024


In [34]:
13517 * 134 * 3

5433834

## ITL - done

In [7]:
S_month_ITL_1423 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_Monthly_straylight_1423.csv') 
S_month_ITL_24_1 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_Monthly_straylight_24_1.csv') 
S_month_ITL_24_2 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_Monthly_straylight_24_2.csv') 
S_month_ITL_25 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_ITL_Monthly_straylight_25.csv') 

# S_month_ITL_1423

S_combined_M_ITL = pd.concat([S_month_ITL_1423, S_month_ITL_24_1, S_month_ITL_24_2, S_month_ITL_25])
S_combined_M_ITL

Unnamed: 0,geo,mean,std_dev,max,obsTime,obsMonth,geo_source
0,TLC,3.330656,11.714717,1011.931274,2014,1,ITL2021
1,TLD,3.648317,7.565249,148.001160,2014,1,ITL2021
2,TLE,3.214823,8.350519,431.243561,2014,1,ITL2021
3,TLF,2.481328,5.879153,325.482361,2014,1,ITL2021
4,TLG,3.143970,6.947987,132.107422,2014,1,ITL2021
...,...,...,...,...,...,...,...
1411,TLN0C,0.969557,1.859428,37.840000,2025,3,ITL2025
1412,TLN0D,2.651627,5.928258,80.339996,2025,3,ITL2025
1413,TLN0E,2.743283,4.401037,45.959999,2025,3,ITL2025
1414,TLN0F,1.269722,2.627162,41.660000,2025,3,ITL2025


In [35]:
472 * 134 *3

189744

In [8]:
S_combined_M_ITL = S_combined_M_ITL.rename(columns={'stdDev': 'std_dev'})

# Melt the dataframe to long format
S_combined_M_ITL = pd.melt(S_combined_M_ITL, 
                  id_vars=['geo', 'geo_source', 'obsTime', 'obsMonth'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in S_combined_M_ITL.columns if col != 'geo_source'] + ['geo_source']
S_combined_M_ITL = S_combined_M_ITL[cols]
S_combined_M_ITL

Unnamed: 0,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,TLC,2014,1,max,1011.931274,ITL2021
1,TLD,2014,1,max,148.001160,ITL2021
2,TLE,2014,1,max,431.243561,ITL2021
3,TLF,2014,1,max,325.482361,ITL2021
4,TLG,2014,1,max,132.107422,ITL2021
...,...,...,...,...,...,...
189739,TLN0C,2025,3,std_dev,1.859428,ITL2025
189740,TLN0D,2025,3,std_dev,5.928258,ITL2025
189741,TLN0E,2025,3,std_dev,4.401037,ITL2025
189742,TLN0F,2025,3,std_dev,2.627162,ITL2025


In [21]:
# S_combined_M_ITL.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Monthly_Stray_ITL_1425.csv", index=True, index_label="id")

S_combined_M_ITL = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Monthly_Stray_ITL_1425.csv') 
validate(data=S_combined_M_ITL)

Unnamed: 0,id,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,0,TLC,2014,1,max,1011.931274,ITL2021
1,1,TLD,2014,1,max,148.001160,ITL2021
2,2,TLE,2014,1,max,431.243561,ITL2021
3,3,TLF,2014,1,max,325.482361,ITL2021
4,4,TLG,2014,1,max,132.107422,ITL2021
...,...,...,...,...,...,...,...
189739,189739,TLN0C,2025,3,std_dev,1.859428,ITL2025
189740,189740,TLN0D,2025,3,std_dev,5.928258,ITL2025
189741,189741,TLN0E,2025,3,std_dev,4.401037,ITL2025
189742,189742,TLN0F,2025,3,std_dev,2.627162,ITL2025


## EURO - done

In [10]:
S_month_EURO_1423 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_Monthly_straylight_1423.csv') 
S_month_EURO_24_1 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_Monthly_straylight_24_1.csv') 
S_month_EURO_24_2 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_Monthly_straylight_24_2.csv') 
S_month_EURO_25 = pd.read_csv(DATA_DIR/'GEE/NTL/ITLEURO/VIIRS_EURO_Monthly_straylight_25.csv') 

# S_month_ITL_1423

S_combined_M_EURO = pd.concat([S_month_EURO_1423, S_month_EURO_24_1, S_month_EURO_24_2, S_month_EURO_25])
S_combined_M_EURO

Unnamed: 0,geo,mean,std_dev,max,obsTime,obsMonth,geo_source
0,AL,0.666451,3.006878,140.805969,2014,1,EURO2021
1,AL0,0.666451,3.006878,140.805969,2014,1,EURO2021
2,AL01,0.621329,2.868292,136.700211,2014,1,EURO2021
3,AL02,1.199861,4.840323,140.805969,2014,1,EURO2021
4,AL03,0.499187,1.986198,103.244179,2014,1,EURO2021
...,...,...,...,...,...,...,...
12841,XK006,1.423532,3.137700,50.389999,2025,3,EURO2025
12842,XK007,1.767674,3.227911,61.119999,2025,3,EURO2025
12843,XK00,1.901704,4.685084,148.800003,2025,3,EURO2025
12844,XK0,1.901704,4.685084,148.800003,2025,3,EURO2025


In [13]:
4282 * 134 *3

1721364

In [12]:
S_combined_M_EURO = S_combined_M_EURO.rename(columns={'stdDev': 'std_dev'})

# Melt the dataframe to long format
S_combined_M_EURO = pd.melt(S_combined_M_EURO, 
                  id_vars=['geo', 'geo_source', 'obsTime', 'obsMonth'], 
                  value_vars=['max', 'mean', 'std_dev'],
                  var_name='metric', 
                  value_name='obsValue')

cols = [col for col in S_combined_M_EURO.columns if col != 'geo_source'] + ['geo_source']
S_combined_M_EURO = S_combined_M_EURO[cols]
S_combined_M_EURO

Unnamed: 0,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,AL,2014,1,max,140.805969,EURO2021
1,AL0,2014,1,max,140.805969,EURO2021
2,AL01,2014,1,max,136.700211,EURO2021
3,AL02,2014,1,max,140.805969,EURO2021
4,AL03,2014,1,max,103.244179,EURO2021
...,...,...,...,...,...,...
1721359,XK006,2025,3,std_dev,3.137700,EURO2025
1721360,XK007,2025,3,std_dev,3.227911,EURO2025
1721361,XK00,2025,3,std_dev,4.685084,EURO2025
1721362,XK0,2025,3,std_dev,4.685084,EURO2025


In [22]:
# S_combined_M_EURO.to_csv(DATA_DIR/"GEE/NTL/Output/NTL_Monthly_Stray_EURO_1425.csv", index=True, index_label="id")

S_combined_M_EURO = pd.read_csv(DATA_DIR/'GEE/NTL/Output/NTL_Monthly_Stray_EURO_1425.csv') 
validate(data=S_combined_M_EURO)

Unnamed: 0,id,geo,obsTime,obsMonth,metric,obsValue,geo_source
0,0,AL,2014,1,max,140.805969,EURO2021
1,1,AL0,2014,1,max,140.805969,EURO2021
2,2,AL01,2014,1,max,136.700211,EURO2021
3,3,AL02,2014,1,max,140.805969,EURO2021
4,4,AL03,2014,1,max,103.244179,EURO2021
...,...,...,...,...,...,...,...
1721359,1721359,XK006,2025,3,std_dev,3.137700,EURO2025
1721360,1721360,XK007,2025,3,std_dev,3.227911,EURO2025
1721361,1721361,XK00,2025,3,std_dev,4.685084,EURO2025
1721362,1721362,XK0,2025,3,std_dev,4.685084,EURO2025


# Total number

In [46]:
# Define the years you're interested in
years = [2003, 2006, 2010, 2013, 2016, 2021, 2024]

# Loop over each year, read the file, and print the number of rows
for year in years:
    file_path = DATA_DIR / f"NUTS/NUTS_RG_01M_{year}_3035.geojson"
    try:
        gdf = gpd.read_file(file_path)
        print(f"{year}: {len(gdf)} rows")
    except Exception as e:
        print(f"Failed to read {file_path.name}: {e}")

2003: 1891 rows
2006: 1931 rows
2010: 1920 rows
2013: 1951 rows
2016: 2016 rows
2021: 2010 rows
2024: 1798 rows


In [23]:
# Define the years you're interested in
years = [2021, 2025]

# Loop over each year, read the file, and print the number of rows
for year in years:
    file_path = DATA_DIR / f"new/EURO_{year}_BGC.geojson"
    try:
        gdf = gpd.read_file(file_path)
        print(f"{year}: {len(gdf)} rows")
    except Exception as e:
        print(f"Failed to read {file_path.name}: {e}")

2021: 2243 rows
2025: 2039 rows


In [25]:
# Define the years you're interested in
years = [2021, 2025]

# Loop over each year, read the file, and print the number of rows
for year in years:
    file_path = DATA_DIR / f"ITL/itl_{year}_BGC.shp"
    try:
        gdf = gpd.read_file(file_path)
        print(f"{year}: {len(gdf)} rows")
    except Exception as e:
        print(f"Failed to read {file_path.name}: {e}")

2021: 232 rows
2025: 240 rows
