<a href="https://colab.research.google.com/github/MEGH06/PINT/blob/Timegan/PINT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def preprocess_plants(row, window_size=60, stride=10, lai_scale=1000):
    LAI = pd.to_numeric(row[1:1441]) * lai_scale
    ETr = pd.to_numeric(row[1441:2881])
    Tr = pd.to_numeric(row[2881:4321])

    input_seqs=[]
    target_seqs=[]

    for start in range(0, len(LAI)-window_size +1 , stride):
        end=start +window_size
        X = np.stack([LAI[start:end], ETr[start:end]],axis=1)
        Y=Tr[start:end]

        input_seqs.append(X)
        target_seqs.append(Y)

    return input_seqs, target_seqs

In [None]:
class PlantDataset(Dataset):
    def __init__(self, csv_path, window_size=60, stride=10, lai_scale=1000):
        self.df = pd.read_csv(csv_path).values
        self.input_seqs = []
        self.target_seqs = []

        for row in self.df:
            X, Y = preprocess_plants(row, window_size, stride, lai_scale)
            X = [np.array(x, dtype=np.float32) for x in X]
            Y = [np.array(y, dtype=np.float32) for y in Y]
            self.input_seqs.extend(X)
            self.target_seqs.extend(Y)

        self.input_seqs = np.array(self.input_seqs, dtype=np.float32)
        self.target_seqs = np.array(self.target_seqs, dtype=np.float32)

    def __len__(self):
        return len(self.input_seqs)

    def __getitem__(self, idx):
        X = torch.from_numpy(self.input_seqs[idx])
        Y = torch.from_numpy(self.target_seqs[idx])
        return X, Y


In [None]:
plant_paths = {
    "MZ": "/content/MZ_plants.csv",
    "FM": "/content/FM_plants.csv",
    "SG": "/content/SG_plants.csv",
    "PM": "/content/PM_plants.csv"
}

plant_data = {}
for plant, path in plant_paths.items():
    dataset = PlantDataset(path)
    loader = DataLoader(dataset, batch_size=32, shuffle=True)
    plant_data[plant] = loader


In [None]:
class PINT(nn.Module):
  def __init__(self, input_dim=2, hidden_dim=64, num_layer=3):
    super().__init__()
    self.lstm=nn.LSTM(input_dim, hidden_dim, num_layer, batch_first=True)
    self.fc=nn.Linear(hidden_dim,1)

    self.beta_unconstrained=nn.Parameter(torch.tensor(0.4))

  def forward(self,x):
    lstm_out,_ =self.lstm(x)
    out=self.fc(lstm_out).squeeze(-1)
    return out

  def beta_learn(self):
    return 0.1+ 0.7* torch.sigmoid(self.beta_unconstrained)

In [None]:
def phy_loss(Tr_pred,LAI,ETr,beta):
  Tr_calc=(1-torch.exp(-beta*LAI))*ETr
  loss=nn.MSELoss()(Tr_pred, Tr_calc)
  return loss

In [8]:
model = PINT(input_dim=2, hidden_dim=64, num_layer=3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [9]:
def training_st(model, optimizer, batch, lambda_bal= 1):
  model.train()
  X,Y =batch
  X,Y=X.to(device), Y.to(device)

  optimizer.zero_grad()
  Tr_pred=model(X)
  beta=model.beta_learn()

  LAI=X[:, :,0]
  ETr=X[:, :,1]

  data_loss=nn.MSELoss()(Tr_pred, Y)
  phys_loss=phy_loss(Tr_pred, LAI, ETr, beta)
  total_loss=data_loss+lambda_bal* phys_loss

  total_loss.backward()
  optimizer.step()

  return total_loss.item(), data_loss.item(), phys_loss.item(), beta.item()

In [10]:
def train(model, optimizer, train_loader, num_epochs):
    beta_log = []
    loss_log = []

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        total_data_loss = 0.0
        total_phys_loss = 0.0

        for batch in train_loader:
            batch_loss, data_loss, phys_loss, beta_val = training_st(model, optimizer, batch)
            total_loss += batch_loss
            total_data_loss += data_loss
            total_phys_loss += phys_loss

        avg_loss = total_loss / len(train_loader)
        avg_data_loss = total_data_loss / len(train_loader)
        avg_phys_loss = total_phys_loss / len(train_loader)

        beta_log.append(beta_val)
        loss_log.append((avg_loss, avg_data_loss, avg_phys_loss))

        print(f"Epoch {epoch+1}/{num_epochs} | Loss: {avg_loss:.4f} | "
              f"Data Loss: {avg_data_loss:.4f} | Phys Loss: {avg_phys_loss:.4f} | "
              f"β: {beta_val:.4f}")

    return beta_log, loss_log

In [None]:
num_epochs = 50
beta_history, loss_history = train(model, optimizer, plant_data['MZ'], num_epochs)

In [None]:
num_epochs = 50
beta_history, loss_history = train(model, optimizer, plant_data['FM'], num_epochs)

In [None]:
num_epochs = 50
beta_history, loss_history = train(model, optimizer, plant_data['PM'], num_epochs)

In [None]:
num_epochs = 50
beta_history, loss_history = train(model, optimizer, plant_data['SG'], num_epochs)

#TimeGANs

In [17]:
!pip install ydata-synthetic



In [54]:
pip show ydata-synthetic

Name: ydata-synthetic
Version: 2.0.0
Summary: Synthetic data generation methods with different synthetization methods.
Home-page: https://github.com/ydataai/ydata-synthetic
Author: YData
Author-email: community@ydata.ai
License: https://github.com/ydataai/ydata-synthetic/blob/master/LICENSE
Location: /usr/local/lib/python3.11/dist-packages
Requires: ydata-sdk
Required-by: 


In [55]:
from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
from ydata_synthetic.synthesizers.timeseries import TimeSeriesSynthesizer

# Usage:
synth = TimeSeriesSynthesizer(modelname='timegan', model_parameters=gan_args)

ImportError: cannot import name 'ModelParameters' from 'ydata_synthetic.synthesizers' (/usr/local/lib/python3.11/dist-packages/ydata_synthetic/synthesizers/__init__.py)

In [45]:
from ydata_synthetic.synthesizers.timeseries.timegan.model import TimeGAN

In [52]:
from ydata_synthetic.synthesizers.timeseries.timegan.model import TimeGAN
from ydata_synthetic.synthesizers.timeseries import TimeSeriesSynthesizer

def generate_noisy_synthetic_data(plant_paths, seq_len=60, stride=10, noise_level=0.1, snr_db=15):
  synthetic_datasets = {}
  # Define TimeGAN model parameters
  model_parameters = {
      'hidden_dim': 128, # Assuming layers_dim corresponds to hidden_dim
      'layers_dim': 128,
      'noise_dim': 32
  }

  # Define TimeGAN training parameters
  gan_args = {
      'batch_size': 32,
      'lr': 5e-4,
  }

  train_args = {
      'epochs': 100,
      'sequence_length': seq_len,
      'number_sequences': 2
  }

  # Define log parameters (adjust if needed based on library usage)
  log_parameters = {
      'sample_every': 10,
      'log_every': 10
  }


  for plant, path in plant_paths.items():
    print(f"Processing {plant} data")
    df = pd.read_csv(path)
    input_seqs = []
    clean_Tr = []

    for _, row in df.iterrows():
      LAI = pd.to_numeric(row[1:1441]) * 1000
      ETr = pd.to_numeric(row[1441:2881])
      Tr = pd.to_numeric(row[2881:4321])

      for start in range(0, len(LAI)-seq_len+1, stride):
        end = start + seq_len
        X = np.stack([LAI[start:end], ETr[start:end]], axis=1)
        input_seqs.append(X)
        clean_Tr.append(Tr[start:end])  # Store clean Tr

    X_original = np.array(input_seqs, dtype=np.float32)

    # Corrected initialization and training of TimeGAN
    # Instantiate the TimeGAN model
    timegan_model = TimeGAN(model_parameters=model_parameters, log_parameters=log_parameters, **gan_args)

    # Instantiate the TimeSeriesSynthesizer with the TimeGAN model
    synth = TimeSeriesSynthesizer(timegan_model=timegan_model)

    # Train the synthesizer
    synth.train(X_original, train_args)


    synthetic_X = synth.sample(len(X_original))

    def add_snr_noise(data, target_snr_db):
        snr = 10 ** (target_snr_db / 10)
        noise_power = np.var(data) / snr
        noise = np.random.normal(0, np.sqrt(noise_power), data.shape)
        return data + noise

    synthetic_X_noisy = np.zeros_like(synthetic_X)
    for i in range(synthetic_X.shape[0]):
        for j in range(synthetic_X.shape[2]):
            synthetic_X_noisy[i,:,j] = add_snr_noise(synthetic_X[i,:,j], snr_db)

    synthetic_datasets[plant] = (synthetic_X_noisy, np.array(clean_Tr))

    np.savez(f'synthetic_noisy_{plant}.npz',
            X=synthetic_X_noisy,
            Y=np.array(clean_Tr), # Original clean Tr

            original_X=X_original)

    print(f"Generated {len(synthetic_X_noisy)} noisy synthetic samples for {plant}")

  return synthetic_datasets

ImportError: cannot import name 'TimeSeriesSynthesizer' from 'ydata_synthetic.synthesizers.timeseries' (/usr/local/lib/python3.11/dist-packages/ydata_synthetic/synthesizers/timeseries/__init__.py)

In [53]:
from ydata_synthetic.synthesizers.timeseries import TimeSeriesSynthesizer

ImportError: cannot import name 'TimeSeriesSynthesizer' from 'ydata_synthetic.synthesizers.timeseries' (/usr/local/lib/python3.11/dist-packages/ydata_synthetic/synthesizers/timeseries/__init__.py)

In [51]:
synthetic_data = generate_noisy_synthetic_data(
    plant_paths,
    seq_len=60,
    stride=10,
    snr_db=10
)

Processing MZ data


  synth = TimeGAN(model_parameters=model_parameters, log_parameters=log_parameters, **gan_args)


AttributeError: 'TimeGAN' object has no attribute 'train'