In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option("display.max_columns", 20)
pd.set_option("display.float_format", "{:.4f}".format)


In [2]:
ndvi = pd.read_parquet("../data_processed/ndvi_kurgan_2019_2023.parquet")

print("NDVI shape:", ndvi.shape)
print("NDVI columns:", ndvi.columns.tolist())
print("NDVI date range:", ndvi.date.min(), "→", ndvi.date.max())
ndvi.head()


NDVI shape: (13301222, 3)
NDVI columns: ['cell_id', 'date', 'ndvi_mean']
NDVI date range: 2019-05-09 00:00:00 → 2023-09-14 00:00:00


Unnamed: 0,cell_id,date,ndvi_mean
0,0,2019-07-28,0.4338
1,1,2019-07-28,0.5081
2,2,2019-07-28,0.5909
3,3,2019-07-28,
4,4,2019-07-28,0.491


In [3]:
y = pd.read_parquet("../data_processed/dataset_y_t_plus_1.parquet")

print("Y shape:", y.shape)
print("Y columns:", y.columns.tolist())
print("Y date range:", y.date.min(), "→", y.date.max())
y.head()


Y shape: (101494107, 3)
Y columns: ['cell_id', 'date', 'y_t_plus_1']
Y date range: 2024-01-10 00:00:00 → 2024-12-25 00:00:00


Unnamed: 0,cell_id,date,y_t_plus_1
0,0,2024-01-10,0
1,0,2024-01-11,0
2,0,2024-01-12,0
3,0,2024-01-13,0
4,0,2024-01-14,0


In [4]:
common_dates = np.intersect1d(
    ndvi.date.values.astype("datetime64[D]"),
    y.date.values.astype("datetime64[D]")
)

print("Common dates:", len(common_dates))


Common dates: 0


In [5]:
y_aligned = y[
    (y.date >= ndvi.date.min()) &
    (y.date <= ndvi.date.max())
].copy()

print("Aligned y shape:", y_aligned.shape)
print("Date range:", y_aligned.date.min(), "→", y_aligned.date.max())


Aligned y shape: (0, 3)
Date range: NaT → NaT
