<p>Example script to extract 1 month of hourly MLST MSG product over a domain</p>

In [2]:
import datetime as dt
import thredds_lsasaf_utils as tlu
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

import rasterio
from rasterio.mask import mask
from shapely.geometry import Point
from shapely import wkt

import statsmodels.api as sm
from statsmodels.genmod.families import Gaussian
from statsmodels.genmod.families.links import Power
from statsmodels.genmod.families import Gamma
from statsmodels.genmod.families.links import log, identity

import pickle
import os

import common_utils

In [1]:
def download_data(dstart, dend, product_freq, LatLonBox):

    # Change here your user credentials
    server_user = "karpagam"
    server_passwd = "chip-chop-2025"

    # Change here the product details
    # Go to https://thredds.lsasvcs.ipma.pt/thredds/catalog/catalog.html
    # Navigate selecting satelite, product, format, and data to find the product_path and product file name
    # This is an example for the MSG MLST

    product_path = "/MSG/MLST/NETCDF/"
    product_fname = "NETCDF4_LSASAF_MSG_LST_MSG-Disk"
    NcvarsLoad = ['LST'] # list of netcdf variables to load from remote files

    # Initialize product details
    product = tlu.lsa_product(product_path,product_fname)
    product.user = server_user
    product.passwd = server_passwd

    # list of slots to be processed:
    slot_list = tlu.gen_slot_list(dstart, dend, product_freq)
    print(f"Will load:{len(slot_list)} files: {slot_list[0]} to {slot_list[-1]}")

    # Load data
    ds_full = tlu.load_product_slots_domain(product, slot_list, NcvarsLoad, LatLonBox=LatLonBox)

    # Extract the data array (assuming the variable name is 'temperature')
    data_array = ds_full['LST']

    # Step 1: Extract the temperature DataArray
    temperature_da = ds_full['LST']

    # Step 2: Stack dimensions (combine 'time', 'lat', and 'lon')
    stacked = temperature_da.stack(points=('time', 'lat', 'lon'))

    # Step 3: Reset the index and convert to DataFrame
    df = stacked.reset_index(['time', 'lat', 'lon']).to_dataframe(name='temperature').reset_index(drop=True)

    # Step 4: Add an 'hour' column 'day', 'month' and 'year'
    df['hour'] = df['time'].dt.hour
    df['day'] = df['time'].dt.day
    df['month'] = df['time'].dt.month
    df['year'] = df['time'].dt.year

    # Create geometry from latitude and longitude
    geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]

    # Convert to GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry=geometry)

    # Set the Coordinate Reference System (CRS) - assuming WGS84 (EPSG:4326)
    gdf.set_crs(epsg=4326, inplace=True)

    return gdf

In [None]:
# Time period to process
dstart = dt.datetime(2022, 8, 12, 0, 0, 0) # start slot
dend = dt.datetime(2022, 8, 12, 0, 59, 59)   # end slot

# Frequency
product_freq = "h" # hourly frequency

# Define latitude/longitude domain to load [lat_min,lat_max,lon_min,lon_max,]
LatLonBox = [41.6899140207028722, 42.0902931428349447, 12.2299337725884012, 12.7300258912577391] # Rome

# Download data
gdf = download_data(dstart, dend, product_freq, LatLonBox)

# Update GeoDataFrame geometry
gdf['geometry'] = gdf['geometry'].apply(lambda geom: round_geometry(geom))

# Drop the 'geometry' column
df_no_geometry = gdf.drop(columns=['geometry'])

# Create filename
csv_filename = f'{downloads}/LST_{dstart.year}-{dstart.month}-{dstart.day}_{dend.year}-{dend.month}-{dend.day}.csv'

# Save to a CSV file
df_no_geometry.to_csv(csv_filename, index=False)

print(f'GeoDataFrame without geometry saved as {csv_filename}')

In [None]:
# Lanuse profile pickle file
land_use_pickle_file = 'rome_2023_landuse_profile.pkl'

# LST data file
csv_filename = '../csv_data_files/fused_LST_2023-6-2_2023-6-30.csv'

# Directory to write the data fused file (LST and LU profile)
destination_dir = '../csv_data_files'

# Fuse data
gdf = add_land_use(csv_filename, land_use_pickle_file, destination_dir)

print(type(gdf['geometry'].iloc[0]))
destination_dir = "../csv_data_files"
save_lst_gdf(gdf, destination_dir, os.path.basename(csv_filename))

In [None]:
# List of columns to analyze
columns_to_analyze = ['water', 'trees', 'flooded_veg', 'crop', 'built_area','bare_ground', 'range_land']

# Prepare a dictionary to store the results
stats = {
    "Column": [],
    "Min": [],
    "Max": [],
    "Mean": [],
    "Median": []
}

# Compute statistics for each column
for column in columns_to_analyze:
    stats["Column"].append(column)
    stats["Min"].append(round(gdf[column].min(), 2))
    stats["Max"].append(round(gdf[column].max(), 2))
    stats["Mean"].append(round(gdf[column].mean(), 2))
    stats["Median"].append(round(gdf[column].median(), 2))

# Create a new DataFrame from the statistics
stats_df = pd.DataFrame(stats)

# Display the DataFrame
print(stats_df)

In [None]:
import matplotlib.pyplot as plt

# Ensure the 'time' column is in datetime format
gdf['time'] = pd.to_datetime(gdf['time'])

# Sort the GeoDataFrame by time for proper plotting
gdf = gdf.sort_values('time')

# Plot temperature over time
plt.figure(figsize=(10, 6))
# plt.plot(gdf['time'], gdf['temperature'], label='Temperature', color='blue')
# plt.scatter(gdf['time'], gdf['temperature'], label='Temperature', color='blue', s=10)  # s=10 sets the marker size
plt.scatter(gdf['time'], gdf['temperature'], c=gdf['hour'], cmap='viridis', s=10)  # s=10 for small points

# Customize the plot
plt.title('Temperature Time Series')
plt.xlabel('Time')
plt.ylabel('Temperature')
plt.grid(True)
plt.legend()
plt.tight_layout()

# Display the plot
plt.show()
