In [1]:
!pip install torch
!pip install numpy as np
!pip install scikit-learn
!pip install pandas

Collecting torch
  Downloading torch-2.5.1-cp311-cp311-win_amd64.whl.metadata (28 kB)
Collecting sympy==1.13.1 (from torch)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.5.1-cp311-cp311-win_amd64.whl (203.1 MB)
   ---------------------------------------- 0.0/203.1 MB ? eta -:--:--
   ---------------------------------------- 0.1/203.1 MB 1.1 MB/s eta 0:03:07
   ---------------------------------------- 0.1/203.1 MB 1.3 MB/s eta 0:02:35
   ---------------------------------------- 0.2/203.1 MB 1.8 MB/s eta 0:01:53
   ---------------------------------------- 0.3/203.1 MB 1.7 MB/s eta 0:02:00
   ---------------------------------------- 0.5/203.1 MB 2.1 MB/s eta 0:01:36
   ---------------------------------------- 0.6/203.1 MB 2.2 MB/s eta 0:01:31
   ---------------------------------------- 0.7/203.1 MB 2.2 MB/s eta 0:01:33
   ---------------------------------------- 0.9/203.1 MB 2.4 MB/s eta 0:01:24
   ---------------------------------------- 1.0/203.1 MB 2.

ERROR: Could not find a version that satisfies the requirement as (from versions: none)
ERROR: No matching distribution found for as




In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import pandas as pd
import numpy as np

## Data Preprocessing

In [49]:
data = pd.read_csv("project/camels-np/240.0.csv")
data.head

<bound method NDFrame.head of              date  snow_depth_water_equivalent_mean  \
0      1981-01-02                            112.51   
1      1981-01-03                            112.73   
2      1981-01-04                            115.08   
3      1981-01-05                            117.59   
4      1981-01-06                            122.11   
...           ...                               ...   
14604  2020-12-27                            106.99   
14605  2020-12-28                            107.02   
14606  2020-12-29                            107.00   
14607  2020-12-30                            106.92   
14608  2020-12-31                            106.82   

       surface_net_solar_radiation_mean  surface_net_thermal_radiation_mean  \
0                                 61.22                              -65.67   
1                                 45.50                              -36.44   
2                                 52.39                              -57

In [12]:
features = [
    'date',
    'snow_depth_water_equivalent_mean',
    'surface_net_solar_radiation_mean',
    'surface_net_thermal_radiation_mean',
    'surface_pressure_mean',
    'temperature_2m_mean',
    'dewpoint_temperature_2m_mean',
    'u_component_of_wind_10m_mean',
    'v_component_of_wind_10m_mean',
    'volumetric_soil_water_layer_1_mean',
    'volumetric_soil_water_layer_2_mean',
    'volumetric_soil_water_layer_3_mean',
    'volumetric_soil_water_layer_4_mean',
    'total_precipitation_sum',
    'potential_evaporation_sum'
]

# Target variable (streamflow)
target = 'streamflow'

In [14]:
X = data[features]
y = data[target]

## Encode dates and scale features

In [16]:
le = LabelEncoder()
X['date'] = le.fit_transform(X['date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['date'] = le.fit_transform(X['date'])


In [18]:
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X.drop('date', axis=1))
scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

## Create Sequences for LSTM

In [26]:
sequence_length = 30
X_sequences, y_sequences = [], []
for i in range(len(X_scaled) - sequence_length):
    X_sequences.append(X_scaled[i:i + sequence_length])
    y_sequences.append(y_scaled[i + sequence_length])
X_sequences = np.array(X_sequences)
y_sequences = np.array(y_sequences)

## Split data

In [29]:
train_size = int(len(X_sequences) * 0.8)
X_train, X_test = X_sequences[:train_size], X_sequences[train_size:]
y_train, y_test = y_sequences[:train_size], y_sequences[train_size:]

## Convert to pytorch tensors

In [34]:
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [40]:
train_dataset

<torch.utils.data.dataset.TensorDataset at 0x2790fba7610>

## Defining LSTM model with parameter-efficient Fine-Tuning

In [43]:
class StreamflowPredictor(nn.Module):
    def __init__(self, input_dim, hidden_dim, pretrained_weights=None):
        super(StreamflowPredictor, self).__init__()
        if pretrained_weights:
            self.lstm = pretrained_weights
            for param in self.lstm.parameters():
                param.requires_grad = False  # Freeze pre-trained weights
        else:
            self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        return self.fc(hidden[-1])

## Load a pre-trained model or train from scratch

In [46]:
pretrained_weights = None  # Replace with loaded weights if available
input_dim = X_train.shape[2]
hidden_dim = 64
model = StreamflowPredictor(input_dim, hidden_dim, pretrained_weights)

## Define optimizer and loss function

In [51]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Train Model

In [56]:
epochs = 20
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions.squeeze(), y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader)}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/20, Loss: nan
Epoch 2/20, Loss: nan
Epoch 3/20, Loss: nan
Epoch 4/20, Loss: nan
Epoch 5/20, Loss: nan
Epoch 6/20, Loss: nan
Epoch 7/20, Loss: nan
Epoch 8/20, Loss: nan
Epoch 9/20, Loss: nan
Epoch 10/20, Loss: nan
Epoch 11/20, Loss: nan
Epoch 12/20, Loss: nan
Epoch 13/20, Loss: nan
Epoch 14/20, Loss: nan
Epoch 15/20, Loss: nan
Epoch 16/20, Loss: nan
Epoch 17/20, Loss: nan
Epoch 18/20, Loss: nan
Epoch 19/20, Loss: nan
Epoch 20/20, Loss: nan


## Save the model

In [None]:
torch.save(model.state_dict(), "streamflow_model.pth")