In [None]:
import torch

print('torch version: ', torch.__version__)
print('CUDA available: ', torch.cuda.is_available())
print('GPU device: ', torch.cuda.get_device_name())

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("The code is using: ", device);

#### Please bare in mind that PyTorch LSTM expects: [batch_size, timesteps, features]
#### For the output of the model it is good to have (batch_size, numoftargest) like (100, 1)

In [29]:
import pandas as pd
import numpy as np

# # Read Excel files on ASUS laptop
# df_pred = pd.read_excel("F:\PhD Lund\Github\Multi-mission altimetry\Altimetry Results\winnebago\Winnebago_combined_predictions.xlsx")   # columns: DateTime, Predicted_WSE, Sensor
# df_in_situ = pd.read_excel("F:\PhD Lund\Github\Multi-mission altimetry\Altimetry Results\winnebago\Winnebago_combined_in_situ.xlsx")  # columns: DateTime, In_Situ_WSE

# Read Excel files on PC office
df_alt = pd.read_excel("/home/user/PHDLund/PythonProjects_github/PyTorch_tutorial/Multi-mission altimetry/Altimetry Results/winnebago/Winnebago_combined_predictions.xlsx")   # columns: DateTime, Predicted_WSE, Sensor
df_in_situ = pd.read_excel("/home/user/PHDLund/PythonProjects_github/PyTorch_tutorial/Multi-mission altimetry/Altimetry Results/winnebago/Winnebago_combined_in_situ.xlsx")  # columns: DateTime, In_Situ_WSE


# Convert DateTime to datetime format
df_alt['DateTime'] = pd.to_datetime(df_alt['DateTime'])
df_in_situ['DateTime'] = pd.to_datetime(df_in_situ['DateTime'])

# Sanity check: make sure datetimes match
assert all(df_alt['DateTime'] == df_in_situ['DateTime']), "DateTimes do not match!"

df_alt['Year'] = df_alt['DateTime'].dt.year
df_alt['DOY'] = df_alt['DateTime'].dt.dayofyear

df_alt['DOY_sin'] = np.sin(2 * np.pi * df_alt['DOY'] / 365)
df_alt['DOY_cos'] = np.cos(2 * np.pi * df_alt['DOY'] / 365)

def map_sensor(sensor_name):
    if sensor_name.startswith("S3A"):
        return 0
    elif sensor_name.startswith("S3B"):
        return 1
    elif sensor_name.startswith("S6"):
        return 2
    elif sensor_name.startswith("SWOT"):
        return 3
    else:
        return -1  # Unknown sensor

df_alt['Sensor_Code'] = df_alt['Sensor'].apply(map_sensor)

# Check for any -1 values in Sensor_Code
unknown_sensors = df_alt[df_alt['Sensor_Code'] == -1]

# Display result
if not unknown_sensors.empty:
    print("⚠️ Unrecognized sensors found:")
    print(unknown_sensors[['DateTime', 'Sensor']])
else:
    print("✅ All sensor names recognized and mapped successfully.")

df_alt['In_Situ_WSE'] = df_in_situ['In_Situ_WSE']

df_alt

✅ All sensor names recognized and mapped successfully.


Unnamed: 0,DateTime,Predicted_WSE,Sensor,Year,DOY,DOY_sin,DOY_cos,Sensor_Code,In_Situ_WSE
0,2024-12-21 03:13:32.433,226.800985,S3A-OCOG,2024,356,-0.154309,0.988023,0,226.783
1,2024-11-24 03:13:36.374,226.943142,S3A-OCOG,2024,329,-0.580800,0.814046,0,226.939
2,2024-10-28 03:13:37.287,226.809828,S3A-OCOG,2024,302,-0.884068,0.467359,0,226.939
3,2024-10-01 03:13:39.703,226.961230,S3A-OCOG,2024,275,-0.999769,0.021516,0,226.978
4,2024-09-04 03:13:34.991,226.603141,S3A-OCOG,2024,248,-0.903356,-0.428892,0,227.015
...,...,...,...,...,...,...,...,...,...
241,2023-11-19 04:14:50.000,226.751084,SWOT,2023,323,-0.661635,0.749826,3,226.853
242,2023-10-29 07:29:45.000,227.057801,SWOT,2023,302,-0.884068,0.467359,3,226.676
243,2023-10-08 10:44:39.000,226.964158,SWOT,2023,281,-0.992222,0.124479,3,226.640
244,2023-09-17 13:59:38.000,226.745636,SWOT,2023,260,-0.972118,-0.234491,3,226.646


In [30]:
# Read Excel files on PC office
df_weather = pd.read_excel("/home/user/PHDLund/PythonProjects_github/PyTorch_tutorial/Multi-mission altimetry/Climate Variables/ERA5.xlsx")   # columns: DateTime, Predicted_WSE, Sensor

df_weather['DATE'] = pd.to_datetime(df_weather['DATE'])
df_weather = df_weather.sort_values('DATE').reset_index(drop=True)
df_weather = df_weather.set_index('DATE')
df_weather

n = 120 # length of timeseries

temp_series = []
prec_series = []
evap_series = []
valid_flags = []  # to track which rows have full history

for timestamp in df_alt['DateTime']:
    start_date = timestamp.normalize() - pd.Timedelta(days=n-1)
    end_date = timestamp.normalize()

    # Slice n days up to and including the day before the timestamp
    ts_slice = df_weather.loc[start_date:end_date]

    if len(ts_slice) == n:
        temp_series.append(ts_slice['Temp_Celsius'].values.tolist())
        prec_series.append(ts_slice['Prec'].values.tolist())
        evap_series.append(ts_slice['Evap'].values.tolist())
        valid_flags.append(True)

    else:
        temp_series.append(None)
        prec_series.append(None)
        evap_series.append(None)
        valid_flags.append(False)

    # print(timestamp)
    # print(timestamp.normalize())
    # print(timestamp.normalize())
    # print(pd.Timedelta(days=n-1))
    # print(start_date)
    # print(end_date)
    # print(ts_slice)

    # break;

# Add to df_pred
df_alt['Temp_series'] = temp_series
df_alt['Prec_series'] = prec_series
df_alt['Evap_series'] = evap_series
df_alt['Valid'] = valid_flags

df_alt_valid = df_alt[df_alt['Valid'] == True].reset_index(drop=True) # remove data that had not complete timeseries

In [31]:
df_alt_valid

Unnamed: 0,DateTime,Predicted_WSE,Sensor,Year,DOY,DOY_sin,DOY_cos,Sensor_Code,In_Situ_WSE,Temp_series,Prec_series,Evap_series,Valid
0,2024-12-21 03:13:32.433,226.800985,S3A-OCOG,2024,356,-0.154309,0.988023,0,226.783,"[19.33, 19.043, 17.202, 15.922, 16.831, 17.215...","[0.001, 0.001, 0.001, 0.006, 0.006, 0.0, 0.004...","[-0.003, -0.003, -0.003, -0.003, -0.004, -0.00...",True
1,2024-11-24 03:13:36.374,226.943142,S3A-OCOG,2024,329,-0.580800,0.814046,0,226.939,"[21.513, 18.627, 17.599, 17.418, 17.574, 19.06...","[0.001, 0.003, 0.005, 0.005, 0.001, 0.003, 0.0...","[-0.004, -0.003, -0.003, -0.003, -0.004, -0.00...",True
2,2024-10-28 03:13:37.287,226.809828,S3A-OCOG,2024,302,-0.884068,0.467359,0,226.939,"[19.695, 19.47, 20.07, 21.173, 21.697, 20.488,...","[0.0, 0.002, 0.007, 0.002, 0.004, 0.005, 0.001...","[-0.005, -0.004, -0.003, -0.003, -0.003, -0.00...",True
3,2024-10-01 03:13:39.703,226.961230,S3A-OCOG,2024,275,-0.999769,0.021516,0,226.978,"[16.238, 16.356, 15.387, 13.619, 13.58, 14.442...","[0.002, 0.007, 0.009, 0.006, 0.003, 0.005, 0.0...","[-0.003, -0.003, -0.003, -0.003, -0.003, -0.00...",True
4,2024-09-04 03:13:34.991,226.603141,S3A-OCOG,2024,248,-0.903356,-0.428892,0,227.015,"[9.33, 9.147, 10.931, 13.217, 14.132, 12.192, ...","[0.006, 0.001, 0.001, 0.003, 0.001, 0.002, 0.0...","[-0.002, -0.002, -0.002, -0.002, -0.002, -0.00...",True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
241,2023-11-19 04:14:50.000,226.751084,SWOT,2023,323,-0.661635,0.749826,3,226.853,"[19.284, 19.163, 18.755, 18.29, 19.247, 20.825...","[0.001, 0.003, 0.001, 0.006, 0.01, 0.004, 0.00...","[-0.004, -0.003, -0.004, -0.004, -0.003, -0.00...",True
242,2023-10-29 07:29:45.000,227.057801,SWOT,2023,302,-0.884068,0.467359,3,226.676,"[16.151, 17.931, 19.413, 20.125, 19.333, 19.08...","[0.006, 0.002, 0.001, 0.002, 0.006, 0.003, 0.0...","[-0.004, -0.004, -0.003, -0.003, -0.003, -0.00...",True
243,2023-10-08 10:44:39.000,226.964158,SWOT,2023,281,-0.992222,0.124479,3,226.640,"[11.69, 12.907, 16.026, 18.082, 16.437, 14.678...","[0.003, 0.008, 0.009, 0.003, 0.003, 0.002, 0.0...","[-0.003, -0.003, -0.003, -0.003, -0.003, -0.00...",True
244,2023-09-17 13:59:38.000,226.745636,SWOT,2023,260,-0.972118,-0.234491,3,226.646,"[16.269, 16.614, 17.425, 15.141, 13.033, 13.43...","[0.003, 0.0, 0.001, 0.003, 0.0, 0.0, 0.0, 0.0,...","[-0.002, -0.002, -0.002, -0.003, -0.002, -0.00...",True


In [32]:
df_alt_valid.to_excel(f"/home/user/PHDLund/PythonProjects_github/PyTorch_tutorial/Multi-mission altimetry/Input_to_ML/W_alt_ERA5_env_{n}days.xlsx", index=False)