# Data extraction methodology

## Indices
* preprocess images
* calculate indices

We extract the following spectral indices for impervious surfaces, water bodies and vegetation:   
* **NDBI**
* **MNDWI**, **NDWI**,
* **NDVI**, **SAVI**


## Population density
* Use closest GHSL population dataset

## LST(Land surface temperature)
* Use Landsat Collection 2 Surface temperature bands
  * for Landsat 8 - band 10
  * for Landsat 4/5/7 - band 6

In [1]:
!pip install eemont
!pip install ee
!pip install geopandas
!pip install rasterio
!pip install earthengine-api
!pip install earthengine-api --upgrade

Collecting rasterio
  Downloading rasterio-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m76.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.1
Collecting earthengine-api
  

In [45]:
import ee
import eemont
import geopandas as gpd
import rasterio
import json
import geemap

In [4]:
ee.Authenticate()
ee.Initialize(project="ee-dianamarkovakn")

In [5]:
aoi = ee.Geometry.Polygon([
  [[23.032164119466383, 42.91889685342199],
   [23.032164119466383, 42.39372184157957],

   [23.710569881185133, 42.39372184157957],
   [23.710569881185133, 42.91889685342199]]
]);

In [6]:
spectral_incides = ['NDBI',	'MNDWI',	'NDWI',	'NDVI',	'SAVI']

In [7]:
ST_bands = {'LE07': 'ST_B6',
            'LT05': 'ST_B6',
            'LC08': 'ST_B10', }

Red = {'LE07': 'SR_B3',
       'LT05': 'SR_B3',
       'LC08': 'SR_B4'}

N = {'LE07': 'SR_B4',
       'LT05': 'SR_B4',
       'LC08': 'SR_B5'}

Green = {'LE07': 'SR_B2',
         'LT05': 'SR_B2',
         'LC08': 'SR_B3'}

Blue = {'LE07': 'SR_B1',
        'LT05': 'SR_B1',
        'LC08': 'SR_B2'}

S1 = {'LE07': 'SR_B5',
         'LT05': 'SR_B5',
         'LC08': 'SR_B6'}

S2 = {'LE07': 'SR_B7',
         'LT05': 'SR_B7',
         'LC08': 'SR_B7'}


In [None]:
id_list = [{'id': 'LC08_184030_20191015',
  'collection': 'LC08',
  'season': 'autumn',
  'tier': 'T1',
  'ghs': 'JRC/GHSL/P2023A/GHS_POP/2020'},]

In [19]:
from collections import defaultdict
from datetime import datetime
import pandas as pd

id_list = pd.read_json('id_list.json')
id_list = id_list.to_dict(orient='records')

In [34]:
len(id_list)

37

In [None]:
indices = eemont.indices()
indices.SAVI.formula


'(1.0 + L) * (N - R) / (N + R + L)'

In [25]:
def collect_indices():
    for item in id_list:
        date_str = item['id'].split('_')[-1]
        date = datetime.strptime(date_str, "%Y%m%d")
        image_loc = f"LANDSAT/{item['collection']}/C02/{item['tier']}_L2/{item['id']}"
        print(f"image loc: {image_loc}, {date.strftime('%Y-%m-%d')}")



        scaled = ee.Image(image_loc).preprocess().reproject(crs='EPSG:32634', scale=120).resample('bilinear').clip(aoi)
        sat = ee.Image(image_loc).maskClouds().reproject(crs='EPSG:32634', scale=120).resample('bilinear').clip(aoi)
        pop = ee.Image(item['ghs']).reproject(crs='EPSG:32634', scale=120).resample('bilinear').clip(aoi).toFloat()
        image = sat

        G = sat.select(Green[item['collection']])
        B = sat.select(Blue[item['collection']])
        R = sat.select(Red[item['collection']])
        NIR = sat.select(N[item['collection']])
        SWIR1 = sat.select(S1[item['collection']])
        SWIR2 = sat.select(S2[item['collection']])
        T = sat.select(ST_bands[item['collection']])

        # Compute indices
        NDBI = (SWIR1.subtract(NIR).divide(NIR.add(SWIR1))).rename('NDBI')
        #NDBaI = (SWIR1.subtract(T).divide(SWIR1.add(T))).rename('NDBaI')
        MNDWI = (G.subtract(SWIR1).divide(G.add(SWIR1))).rename('MNDWI')
        NDWI = (G.subtract(NIR).divide(G.add(NIR))).rename('NDWI')
        NDVI = (NIR.subtract(R).divide(NIR.add(R))).rename('NDVI')
        SAVI = (NIR.subtract(R).divide(NIR.add(R).add(0.5)).multiply(1.5)).rename('SAVI')

        # Add indices to image
        image = image.addBands([NDBI, MNDWI, NDWI, NDVI, SAVI])

        # Optional: min/max check for indices
        for index in ['NDBI', 'MNDWI', 'NDWI', 'NDVI', 'SAVI']:
            min_max_dict = image.select(index).reduceRegion(
                reducer=ee.Reducer.minMax(),
                scale=120,
                geometry=aoi,
            ).getInfo()
            print(f"{index} min/max: {min_max_dict}")

        # Add population and LST to image
        image = image.addBands(pop.rename('population_count'))
        LST = scaled.select(ST_bands[item['collection']]).subtract(273.15).rename('LST').toFloat()
        image = image.addBands(LST)

        # Prepare the image for export
        name = item['id']
        image = image.select(['NDBI', 'MNDWI', 'NDWI', 'NDVI', 'SAVI', 'population_count', 'LST'])

        # Cast all bands to float
        image = image.cast({
            'NDBI': 'float', 'MNDWI': 'float',
            'NDWI': 'float', 'NDVI': 'float', 'SAVI': 'float',
            'population_count': 'float', 'LST': 'float'
        })

        # Export the image to Google Drive
        task = ee.batch.Export.image.toDrive(**{
            'image': image,
            'scale': 120,
            'description': name,
            'folder': 'data-indices-crs-fix-more',
            'region': aoi.bounds(),
        })
        task.start()


In [26]:
collect_indices()

image loc: LANDSAT/LT05/C02/T1_L2/LT05_184030_19990415, 1999-04-15
NDBI min/max: {'NDBI_max': 0.18288595859304038, 'NDBI_min': -0.751078132059198}
MNDWI min/max: {'MNDWI_max': 0.7599666993407006, 'MNDWI_min': -0.3182318260035544}
NDWI min/max: {'NDWI_max': 0.2535506269187731, 'NDWI_min': -0.4548091943762553}
NDVI min/max: {'NDVI_max': 0.4872950235277342, 'NDVI_min': -0.3253584646186826}
SAVI min/max: {'SAVI_max': 0.7309321127604055, 'SAVI_min': -0.4880352294616991}
image loc: LANDSAT/LE07/C02/T1_L2/LE07_184030_19990930, 1999-09-30
NDBI min/max: {'NDBI_max': 0.1795346990104908, 'NDBI_min': -0.267261115797084}
MNDWI min/max: {'MNDWI_max': 0.12682137075013492, 'MNDWI_min': -0.3393019420416681}
NDWI min/max: {'NDWI_max': 0.08146522443434186, 'NDWI_min': -0.435170153459736}
NDVI min/max: {'NDVI_max': 0.4552453027139875, 'NDVI_min': -0.06213710909646532}
SAVI min/max: {'SAVI_max': 0.6828568166620456, 'SAVI_min': -0.09320329012707224}
image loc: LANDSAT/LE07/C02/T1_L2/LE07_184030_20000527, 20

In [None]:
def print_crs_and_transform():
    for item in id_list:
        date_str = item['id'].split('_')[-1]
        date = datetime.strptime(date_str, "%Y%m%d")
        image_loc = f"LANDSAT/{item['collection']}/C02/{item['tier']}_L2/{item['id']}"
        print(f"Processing image: {image_loc}, Date: {date.strftime('%Y-%m-%d')}")

        # Load the image from Earth Engine
        landsat_image = ee.Image(image_loc)

        # Get projection info
        landsat_projection = landsat_image.projection()

        # Get CRS and Transform
        landsat_crs = landsat_projection.crs().getInfo()  # CRS as EPSG code
        landsat_transform = landsat_projection.transform().getInfo()  # Affine transform

        # Print CRS and Transform
        print(f"Image ID: {item['id']}")
        print(f"  - CRS: {landsat_crs != 'EPSG:32634'}")
        print(f"  - Transform: {landsat_transform}")
        print("\n")  # Add a newline for readability between images

In [None]:
import ee

# Initialize Earth Engine
ee.Initialize()

# Function to print CRS and transform
def print_pop_crs_and_transform(ghs_dataset):
    pop_image = ee.Image(ghs_dataset)
    pop_projection = pop_image.projection()

    # Get CRS and Transform
    pop_crs = pop_projection.crs().getInfo()  # CRS as EPSG code
    pop_transform = pop_projection.transform().getInfo()  # Affine transform

    # Print CRS and Transform
    print(f"GHS Dataset: {ghs_dataset}")
    print(f"  - CRS: {pop_crs}")
    print(f"  - Transform: {pop_transform}")
    print("\n")  # Add a newline for readability

# # Loop through id_list and print CRS and transform for each population dataset
# for item in id_list:
#     ghs_dataset = item['ghs']  # Assuming 'ghs' key holds the dataset path
#     print_pop_crs_and_transform(ghs_dataset)

In [None]:
# print_crs_and_transform()

In [None]:
# collect_indices()

In [30]:
tasks = ee.batch.Task.list()
for task in tasks[:32]:

    print(task.status())

{'state': 'COMPLETED', 'description': 'LC08_184030_20230722', 'priority': 100, 'creation_timestamp_ms': 1728935576819, 'update_timestamp_ms': 1728935900557, 'start_timestamp_ms': 1728935885564, 'task_type': 'EXPORT_IMAGE', 'destination_uris': ['https://drive.google.com/#folders/1Bbk0_rWMOgmCmKkxryDgFPJsHjC9RCQc'], 'attempt': 1, 'batch_eecu_usage_seconds': 2.7924509048461914, 'id': 'YL6XGD6RMQ2YVII2ZS6LD6Z3', 'name': 'projects/ee-dianamarkovakn/operations/YL6XGD6RMQ2YVII2ZS6LD6Z3'}
{'state': 'COMPLETED', 'description': 'LC08_184030_20221226', 'priority': 100, 'creation_timestamp_ms': 1728935575681, 'update_timestamp_ms': 1728935880079, 'start_timestamp_ms': 1728935868051, 'task_type': 'EXPORT_IMAGE', 'destination_uris': ['https://drive.google.com/#folders/1Bbk0_rWMOgmCmKkxryDgFPJsHjC9RCQc'], 'attempt': 1, 'batch_eecu_usage_seconds': 1.861225962638855, 'id': 'MPCLFX6RQG4HLPB7T47KMGPZ', 'name': 'projects/ee-dianamarkovakn/operations/MPCLFX6RQG4HLPB7T47KMGPZ'}
{'state': 'COMPLETED', 'descr

In [24]:
# prompt: stop all tasks google earth engine

tasks = ee.batch.Task.list()
for task in tasks:
  if task.status()['state'] == 'READY' or task.status()['state'] == 'RUNNING':
    task.cancel()
    print(f"Task {task.id} has been cancelled.")
  else:
    print(f"Task {task.id} status: {task.status()['state']}")

Task SRMMVWIY27S32ZDF6PREQLBI has been cancelled.
Task FLZOEHTDCJB4FL5QXR7NCR3H has been cancelled.
Task EBMUB77H5VQHNHU2X7CB7TBV has been cancelled.
Task QLW6J6TA4ON4VAOOCDJ4EAXU has been cancelled.
Task EVIVBCDLQRYNLU5ZDFO23UEJ has been cancelled.
Task BR54R6NYHI35UVKQLIZLYHLI has been cancelled.
Task 6KL5KHO6H2CIHY7IPVXAL3TM has been cancelled.
Task X4SZ7BZKD2TJ5OF6QGWOLEHB has been cancelled.
Task 5ELB3FICOJRUEPFB25H5XZRT has been cancelled.
Task YLYBJIY26URKD64WJYSPLITW has been cancelled.
Task 375WDFEGFNPY2BEDE7FDRL3E has been cancelled.
Task QULPUDE2VICVB4D6W66NX3DJ has been cancelled.
Task YOLR7KNZZWCE3LKLQXS3D2A7 has been cancelled.
Task HSDTET6ZTTBDGBLPLB4XMBU3 has been cancelled.
Task WZJ2QKOKSZIGQZP6PYLRFAGH has been cancelled.
Task ZWN6DF6M6CF6HTQTPJJJLTGP has been cancelled.
Task EQGD65EAJBEUBY2GFFSYQFCX has been cancelled.
Task CFKUV5EGZEIDBS26VY32YAJF has been cancelled.
Task 5PQVFJBIRMJRS6L7YJ5LZ37K has been cancelled.
Task QZVLKY2OGOUQ63SIQJ5PGVII has been cancelled.


In [None]:
def collect_pop():
    for year in range(1975, 2025, 5):
        pop = ee.Image(f"JRC/GHSL/P2023A/GHS_POP/{year}").clip(aoi)

        name = pop.id().getInfo()

        print(f"name: {name}")

        # Now we export and resample
        task = ee.batch.Export.image.toDrive(**{
            'image': pop,
            'description': name,
            'folder': 'population-ghs-aoi-clipped',
            'scale': 120,
            'region': aoi.bounds()
        })
        task.start()


In [None]:
collect_pop()

name: 2020


In [None]:
# tasks = ee.batch.Task.list()
# for task in tasks[:30]:
#     print(task.status())

In [None]:
L5 = (ee.Image('LANDSAT/LT05/C02/T1/LT05_184030_19990415'))


In [None]:
import ee
import geemap

# Initialize Earth Engine
ee.Initialize()


aoi = ee.Geometry.Polygon([
[ [23.147785867237445, 42.591241793466885],
  [23.546040261768695, 42.591241793466885],
  [23.546040261768695, 42.83542772358794],
  [23.147785867237445, 42.83542772358794],
]]);

# Define urban and rural polygons
Urban = ee.Geometry.MultiPolygon(
        [[[[23.246904788408127, 42.688226341159925],
           [23.323809085283127, 42.65339079664303],
           [23.359514651689377, 42.69882458798372],
           [23.310076175126877, 42.71951119054903],
           [23.271624026689377, 42.71143919029933]]],
         [[[23.255144534501877, 42.74170377728034],
           [23.243471560869065, 42.710934655422136],
           [23.281923709306565, 42.71698880325915],
           [23.282610354814377, 42.72859093579189]]],
         [[[23.380114016923752, 42.70437532879963],
           [23.383547244462815, 42.674597366330666],
           [23.426805911455002, 42.6917592910562],
           [23.41787951985344, 42.7033661401078]]]])

Rural = ee.Geometry.MultiPolygon(
        [[[[23.46457141438469, 42.666771974480405],
           [23.469377932939377, 42.64657286301201],
           [23.49306720295891, 42.65869311722898],
           [23.481737552080002, 42.671568299263335]]],
         [[[23.254394538400174, 42.64365931309603],
           [23.258857734200955, 42.627241935261246],
           [23.282032020089627, 42.635830026978525],
           [23.267784125802518, 42.64530081266675]]]]);

landsat_image = ee.Image('LANDSAT/LC08/C02/T1_L2/LC08_184030_20191015').preprocess().clip(aoi)
st_band = landsat_image.select('ST_B10').subtract(273.15)


mean_urban = st_band.reduceRegion(
    reducer=ee.Reducer.mean(),
    geometry=Urban,
    scale=30,
    bestEffort=True
).get('ST_B10').getInfo()

mean_rural = st_band.reduceRegion(
    reducer=ee.Reducer.mean(),
    geometry=Rural,
    scale=30,
    bestEffort=True
).get('ST_B10').getInfo()


vis_params = {
    'min': mean_rural,
    'max': mean_urban,
    'palette': ['blue', 'cyan', 'yellow', 'orange', 'red'],

}


Map = geemap.Map(center=[42.6977, 23.3219], zoom=10)

Map.addLayer(st_band, vis_params, "Landsat 8 ST Band (Celsius)")
Map.addLayer(aoi, {'color': 'black'}, "AOI")
Map.addLayer(Urban, {'color': 'green'}, "Urban Areas")
Map.addLayer(Rural, {'color': 'yellow'}, "Rural Areas")

print("Mean Surface Temperature in Urban Areas:", mean_urban)
print("Mean Surface Temperature in Rural Areas:", mean_rural)

Map



Mean Surface Temperature in Urban Areas: 23.984798299332656
Mean Surface Temperature in Rural Areas: 21.634435411484127


Map(center=[42.6977, 23.3219], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDa…

In [59]:


with open('sofia-boundaries.json') as f:
    geojson_data = json.load(f)

aoi = ee.FeatureCollection(geojson_data).geometry()


seasons = {
    'spring': [3, 4, 5],
    'summer': [6, 7, 8],
    'autumn': [9, 10, 11],
    # Winter is split into two
    'winter1': [12],  # December of the current year
    'winter2': [1, 2]  # January and February of the next year
}

# Create an empty dictionary to store the mean LST for each season and year
lst_data = defaultdict(dict)
with open('sofia-boundaries.json') as f:
    geojson_data = json.load(f)

aoi = ee.FeatureCollection(geojson_data).geometry()

# Create an empty dictionary to store the mean LST for each season and year
lst_data = defaultdict(dict)

for year in range(2014, 2024):
  for season, months in seasons.items():
    start_date = ee.Date.fromYMD(year, months[0], 1)
    end_date = ee.Date.fromYMD(year, months[-1], 1).advance(1, 'month').advance(-1, 'day')

    # Create an ImageCollection of Landsat 8 surface temperature data
    new_var = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
        .filterBounds(aoi) \
        .filter(ee.Filter.contains(
                leftField = '.geo',
                rightValue = aoi))\
        .filterDate(start_date, end_date)
    landsat_collection = new_var
    band = "ST_B10"

    if landsat_collection.size().getInfo() == 0:
        print(f"No Landsat 8 data for {year} {season}. Searching for Landsat 7...")
        # Search for Landsat 7 data
        landsat_collection = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
                               .filterBounds(aoi) \
                               .filter(ee.Filter.contains(
                                      leftField = '.geo',
                                      rightValue = aoi))\
                               .filterDate(start_date, end_date) \
                               .map(lambda image: image.clip(aoi))
        print(f"Found {landsat_collection.size().getInfo()} Landsat 7 images.")
        band = "ST_B6"
    print(f"Year: {year}, Season: {season}, Collection Size: {landsat_collection.size().getInfo()}")

    # Define a function to calculate the mean LST for each image
    def calculate_mean_lst(image):
        lst_band = image.select(band).subtract(273.15)  # Preprocess and subtract 273.15 directly
        mean_lst = lst_band.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=aoi,
            scale=30,
            bestEffort=True
        ).get(band)
        return image.set({'mean_lst': mean_lst})




  # Map the function to the ImageCollection and get the mean LST for the season
    lst_stats = landsat_collection.preprocess().map(lambda image: image.clip(aoi)).map(calculate_mean_lst) \
      .aggregate_array('mean_lst') \
      .getInfo()


    if lst_stats:
        mean_season_lst = sum(lst_stats) / len(lst_stats) if lst_stats else None
        lst_data[year][season] = mean_season_lst
    else:
        print(f"No data for {year} {season}")

df = pd.DataFrame.from_dict(lst_data, orient='index')

print(df)
df.to_csv('lst_data_2014_2018.csv')

Year: 2014, Season: spring, Collection Size: 5
Year: 2014, Season: summer, Collection Size: 6
Year: 2014, Season: autumn, Collection Size: 5
Year: 2014, Season: winter1, Collection Size: 1
Year: 2014, Season: winter2, Collection Size: 4
Year: 2015, Season: spring, Collection Size: 5
Year: 2015, Season: summer, Collection Size: 5
Year: 2015, Season: autumn, Collection Size: 5
Year: 2015, Season: winter1, Collection Size: 2
Year: 2015, Season: winter2, Collection Size: 3
Year: 2016, Season: spring, Collection Size: 4
Year: 2016, Season: summer, Collection Size: 5
Year: 2016, Season: autumn, Collection Size: 4
Year: 2016, Season: winter1, Collection Size: 2
Year: 2016, Season: winter2, Collection Size: 4
Year: 2017, Season: spring, Collection Size: 5
Year: 2017, Season: summer, Collection Size: 6
Year: 2017, Season: autumn, Collection Size: 5
Year: 2017, Season: winter1, Collection Size: 2
Year: 2017, Season: winter2, Collection Size: 2
Year: 2018, Season: spring, Collection Size: 4
Year:

In [60]:


with open('sofia-boundaries.json') as f:
    geojson_data = json.load(f)

aoi = ee.FeatureCollection(geojson_data).geometry()


seasons = {
    'spring': [3, 4, 5],
    'summer': [6, 7, 8],
    'autumn': [9, 10, 11],
    # Winter is split into two
    'winter1': [12],  # December of the current year
    'winter2': [1, 2]  # January and February of the next year
}

# Create an empty dictionary to store the mean LST for each season and year
lst_data = defaultdict(dict)
with open('sofia-boundaries.json') as f:
    geojson_data = json.load(f)

aoi = ee.FeatureCollection(geojson_data).geometry()

# Create an empty dictionary to store the mean LST for each season and year
lst_data = defaultdict(dict)

for year in range(1999, 2014):
  for season, months in seasons.items():
    start_date = ee.Date.fromYMD(year, months[0], 1)
    end_date = ee.Date.fromYMD(year, months[-1], 1).advance(1, 'month').advance(-1, 'day')

    # Create an ImageCollection of Landsat 8 surface temperature data
    new_var = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
        .filterBounds(aoi) \
        .filter(ee.Filter.contains(
                leftField = '.geo',
                rightValue = aoi))\
        .filterDate(start_date, end_date)
    landsat_collection = new_var

    # Check if the collection is empty
    if landsat_collection.size().getInfo() == 0:
        print(f"No Landsat 5 data for {year} {season}. Searching for Landsat 7...")
        # Search for Landsat 7 data
        landsat_collection = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
                               .filterBounds(aoi) \
                               .filter(ee.Filter.contains(
                                      leftField = '.geo',
                                      rightValue = aoi))\
                               .filterDate(start_date, end_date) \
                               .map(lambda image: image.clip(aoi))
        print(f"Found {landsat_collection.size().getInfo()} Landsat 7 images.")


    print(f"Year: {year}, Season: {season}, Collection Size: {landsat_collection.size().getInfo()}")

    # Define a function to calculate the mean LST for each image
    def calculate_mean_lst(image):
        lst_band = image.select('ST_B6').subtract(273.15)  # Preprocess and subtract 273.15 directly
        mean_lst = lst_band.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=aoi,
            scale=30,
            bestEffort=True
        ).get('ST_B6')
        return image.set({'mean_lst': mean_lst})


    # Map the function to the ImageCollection and get the mean LST for the season
    lst_stats = landsat_collection.preprocess().map(lambda image: image.clip(aoi)).map(calculate_mean_lst) \
        .aggregate_array('mean_lst') \
        .getInfo()


    if lst_stats:
        mean_season_lst = sum(lst_stats) / len(lst_stats) if lst_stats else None
        lst_data[year][season] = mean_season_lst
    else:
        print(f"No data for {year} {season}")

df = pd.DataFrame.from_dict(lst_data, orient='index')
df.to_csv('lst_data_1999_2014.csv')
print(df)

Year: 1999, Season: spring, Collection Size: 4
Year: 1999, Season: summer, Collection Size: 5
Year: 1999, Season: autumn, Collection Size: 3
Year: 1999, Season: winter1, Collection Size: 1
No data for 1999 winter1
Year: 1999, Season: winter2, Collection Size: 1
Year: 2000, Season: spring, Collection Size: 1
Year: 2000, Season: summer, Collection Size: 4
Year: 2000, Season: autumn, Collection Size: 1
Year: 2000, Season: winter1, Collection Size: 1
Year: 2000, Season: winter2, Collection Size: 1
Year: 2001, Season: spring, Collection Size: 2
Year: 2001, Season: summer, Collection Size: 5
No Landsat 5 data for 2001 autumn. Searching for Landsat 7...
Found 4 Landsat 7 images.
Year: 2001, Season: autumn, Collection Size: 4
No Landsat 5 data for 2001 winter1. Searching for Landsat 7...
Found 1 Landsat 7 images.
Year: 2001, Season: winter1, Collection Size: 1
Year: 2001, Season: winter2, Collection Size: 2
Year: 2002, Season: spring, Collection Size: 2
Year: 2002, Season: summer, Collection S

In [63]:
older = pd.read_csv('lst_data_1999_2014.csv')
newer = pd.read_csv('lst_data_2014_2018.csv')

In [74]:
combined_df = pd.concat([older, newer])
combined_df.sort_values(by='Unnamed: 0', inplace=True)
combined_df.to_csv('combined_lst_data.csv', index=False)

In [75]:
comb = pd.read_csv('combined_lst_data.csv')

In [76]:
comb

Unnamed: 0.1,Unnamed: 0,spring,summer,autumn,winter2,winter1
0,1999,18.510749,28.350053,8.19087,2.312946,
1,2000,-6.597169,31.830755,24.447218,-6.07526,3.339239
2,2001,23.683959,27.767554,12.643873,4.127936,-4.366734
3,2002,17.893894,22.618798,16.497819,-8.686099,-4.510638
4,2003,18.393407,31.265748,24.125077,-2.713349,5.768334
5,2004,,27.868735,15.860216,-7.596758,-2.56607
6,2005,18.819965,27.059701,12.546015,-8.999055,3.027935
7,2006,25.05922,27.84963,17.297769,-3.435947,2.205229
8,2007,21.064099,29.126255,20.259729,7.300056,
9,2008,21.981178,30.460622,18.390873,-0.742434,


In [79]:
# prompt: Чрез DataFrame: comb: average winter 1 and winter 2, but if one is None, keep the value which is not

# Create a new column 'winter_avg' by averaging 'winter1' and 'winter2'.
comb['winter'] = (comb['winter1'] + comb['winter2']) / 2

# If either 'winter1' or 'winter2' is None, keep the value which is not None.
comb['winter'] = comb.apply(lambda row: row['winter1'] if pd.isnull(row['winter2']) else (row['winter2'] if pd.isnull(row['winter1']) else row['winter_avg']), axis=1)

# Print or use the 'winter_avg' column.
#print(comb)

In [82]:

print(comb)

    Unnamed: 0     spring     summer     autumn    winter
0         1999  18.510749  28.350053   8.190870  2.312946
1         2000  -6.597169  31.830755  24.447218 -1.368011
2         2001  23.683959  27.767554  12.643873 -0.119399
3         2002  17.893894  22.618798  16.497819 -6.598369
4         2003  18.393407  31.265748  24.125077  1.527493
5         2004        NaN  27.868735  15.860216 -5.081414
6         2005  18.819965  27.059701  12.546015 -2.985560
7         2006  25.059220  27.849630  17.297769 -0.615359
8         2007  21.064099  29.126255  20.259729  7.300056
9         2008  21.981178  30.460622  18.390873 -0.742434
10        2009  21.544015  28.203011  13.406754 -7.114075
11        2010   8.692611  32.179631  16.517743  3.979031
12        2011  18.163925  30.333165  20.639756  2.721457
13        2012  21.650702  33.875325  22.426786       NaN
14        2013  25.078332  30.087777  16.952587  2.265549
15        2014  23.857086  29.283455  15.828750  5.636597
16        2015

In [81]:

comb.drop(columns=['winter1', 'winter2', 'winter_avg'], inplace=True)

In [83]:
comb.to_csv('combined_lst_data.csv', index=False)