In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Conv1D, MaxPool1D, Flatten, Dropout, GRU, TimeDistributed # importing dense layer
from tensorflow.keras.models import Sequential #importing Sequential layer
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

In [None]:
#Load Dataset
data_path = '/content/drive/MyDrive/merged_weather_marine_data.csv'
data=pd.read_csv(data_path)

In [None]:
#Cleaning and Preprocessing
print(data.describe())

#column temperature (min, max, avg), precipitation, snow, wind(speed, direction, peak gust), pressure, sunshine duration
print(data.columns)

#melihat info descriptive statistic
print(data.describe())

#melihat banyaknya missing value
print(data.isnull().sum())

#melihat banyaknya entri unique
print(data.nunique())

# Menentukan kolom-kolom yang bertipe object
object_cols = data.select_dtypes(include=['object']).columns
df = data.drop(columns=object_cols)

df = df.dropna()   #58 row is dropped
#df['sail_decision'] = 1
print(df.columns)
print(df.shape)

       temperature_2m (°C)  relative_humidity_2m (%)  dew_point_2m (°C)  \
count         26304.000000              26304.000000       26304.000000   
mean             27.655056                 77.906060          23.403665   
std               0.975380                  5.904932           0.954461   
min              23.500000                 54.000000          17.600000   
25%              27.000000                 74.000000          23.000000   
50%              27.700000                 78.000000          23.600000   
75%              28.400000                 82.000000          24.000000   
max              31.100000                 95.000000          26.200000   

       apparent_temperature (°C)  precipitation (mm)     rain (mm)  \
count               26304.000000        26304.000000  26304.000000   
mean                   31.119514            0.231858      0.231858   
std                     1.929716            0.720542      0.720542   
min                    24.600000            

In [None]:
def is_safe_to_sail(row):
    # Kriteria kondisi aman
    if (20 <= row['temperature_2m (°C)'] <= 30 and
        row['precipitation (mm)'] == 0 and
        row['wind_speed_10m (km/h)'] < 15 and
        row['wave_height (m)'] <= 1.5 and
        row['cloud_cover (%)'] < 70 and
        row['wind_speed_10m_max (km/h)'] < 20 and
        row['wave_height_max (m)'] <= 2.0 and
        row['wind_wave_height (m)'] <= 1.0 and
        row['swell_wave_height (m)'] <= 1.5 and
        row['weather_code (wmo code)'] in [0, 1, 2, 3, 4]):
        return 'yes'
    else:
        return 'no'

df['safe_to_sail'] = df.apply(is_safe_to_sail, axis=1)
# write-back to csv


In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(df)

print(data_scaled.shape)

#turn into sequence
steps= 20
inp = []
out = []
for i in range(len(data_scaled) - (steps)):
    inp.append(data_scaled[i:i+steps])
    out.append(data_scaled[i+steps])

print(len(inp)) #input panjangnya 210, output panjangnya 20
# # print(inp)

inp= np.asanyarray(inp)
out= np.asanyarray(out)

print(inp.shape)
print(out.shape)

train_test_ratio = 0.7
train_size = int(train_test_ratio * data_scaled.shape[0])
print(train_size)

#180 data for train, 30 data for test
x_train_tseries = inp[:train_size,:,:]
x_test_tseries = inp[train_size:,:,:]
y_train_tseries = out[:train_size]
y_test_tseries= out[train_size:]

print(x_train_tseries.shape)
print(x_test_tseries.shape)
print(y_train_tseries.shape)
print(y_test_tseries.shape)

#feature engineering data static



(12521, 28)
12501
(12501, 20, 28)
(12501, 28)
8764
(8764, 20, 28)
(3737, 20, 28)
(8764, 28)
(3737, 28)


In [None]:
input_tseries = Input(shape=(x_train_tseries.shape[1], x_train_tseries.shape[2]))
gru_out = GRU(32, return_sequences=True)(input_tseries)
time_dist1 = TimeDistributed(Dense(units=8, activation = 'relu'))(gru_out)
flatten = Flatten()(time_dist1)   #apa tambah layer 168 neuron, 84, 42
temp_our = Dense(32)(flatten)
gru_out_binary = Dense(1, activation='sigmoid', name='binary_output')(flatten) #ini bisa jadi dihapus

input_static = Input(shape=(x_train_static.shape[1],))
combined = Concatenate()([temp_our, input_static])

x = Dense(64, activation='relu')(combined)
x = Dense(32, activation='relu')(x)
x = Dense(1, activation='relu', name='regression_output')(x)

In [None]:
weather_model = Model(inputs=[input_tseries, input_static], outputs=[gru_out_binary, out])

# Kompilasi model
weather_model.compile(optimizer=Adam(),
              loss={'binary_output': 'binary_crossentropy', 'regression_output': 'mse'},
              metrics={'binary_output': 'accuracy', 'regression_output': 'mae'})

weather_model.summary()

In [None]:
history = weather_model.fit([x_train_tseries, x_train_static],
                    {'binary_output': y_train_binary, 'regression_output': y_train_regression},
                    epochs=50, batch_size=32, validation_split=0.2)

In [None]:
results = model.evaluate([x_test_tseries, x_test_static],
                         {'binary_output': y_test_binary, 'regression_output': y_test_regression})

In [None]:
#Memprediksi beberapa waktu ke-depan, inverse scaler nya belum dipasang
forecast_binary, forecast_regression = weather_model.predict([x_forecast_tseries, x_forecast_static])  #x_forecast nanti harus diganti

print("Prediksi Biner:", forecast_binary)
print("Prediksi Regresi:", forecast_regression)

# Ulangi prediksi untuk 7 hari ke depan
forecast_days = 7
forecasts_binary = []
forecasts_regression = []

for _ in range(forecast_days):
    forecast_binary, forecast_regression = weather_model.predict([x_forecast_tseries, x_forecast_static])
    forecasts_binary.append(forecast_binary[0][0])
    forecasts_regression.append(forecast_regression[0][0])

    # Update data untuk hari berikutnya
    x_forecast_tseries = np.roll(x_forecast_tseries, -1, axis=1)
    x_forecast_tseries[0, -1, :] = forecast_regression[0][0]  # Anggap prediksi regresi sebagai fitur untuk timestep berikutnya

print("Prediksi Biner untuk 7 hari ke depan:", forecasts_binary)
print("Prediksi Regresi untuk 7 hari ke depan:", forecasts_regression)

**Non seq2seq without static data, output only binary**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Concatenate, Flatten, Lambda
from tensorflow.keras.optimizers import Adam

# Hyperparameters
input_seq_len = x_train_tseries.shape[1]
num_features = x_train_tseries.shape[2]
target_seq_len = x_train_tseries.shape[1]#y_train.shape[1]  # Number of steps to predict
latent_dim = 32  # Dimension of the latent space

# input_seq_len = 10
# num_features = 20
# target_seq_len = 8  # Number of steps to predict
# latent_dim = 64  # Dimension of the latent space

# Example data preparation, bagian ini nanti diganti jadi data tseries
# x_train = np.random.rand(1000, 10, num_features)  # 1000 samples, 10 time steps, num_features features
# y_train = np.random.rand(1000, 7, num_features)   # 1000 samples, 7 time steps, num_features features
# x_train_static = np.random.rand(1000, 5)          # 1000 samples, 5 static features
# y_train_binary = np.random.randint(0, 2, size=(1000, 1))  # 1000 samples, binary output

# Define the GRU model
input_seq = Input(shape=(input_seq_len, num_features))
#static_input = Input(shape=(x_train_static.shape[1],))

gru_out = GRU(latent_dim, return_sequences=True)(input_seq)
seq_output = Dense(num_features, activation='linear')(gru_out)

# Flatten the sequence output
flatten_seq = Flatten()(seq_output)

# Concatenate the sequence output with static data
#combined = Concatenate()([flatten_seq, static_input])

# Dense layers for binary classification
x = Dense(64, activation='relu')(flatten_seq)
x = Dense(32, activation='relu')(x)
x = Dense(16, activation='relu')(x)
binary_output = Dense(1, activation='sigmoid', name='binary_output')(x)

# Define and compile the final model
model = Model(inputs=input_seq, outputs=binary_output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model
model.fit(x_train, y_train_binary, epochs=50, batch_size=32, validation_split=0.2)

# Function to predict sequence output and binary classification
def predict_sequence_and_binary(input_seq, static_input, model):
    #binary_output = model.predict([input_seq, static_input])
    binary_output = model.predict(input_seq)
    return binary_output

# Example usage
input_seq = x_train[:1]  # Take the first sample from training data
static_input = x_train_static[:1]
binary_prediction = predict_sequence_and_binary(input_seq, static_input, model)

#print("Predicted Sequence:", predicted_sequence)
print("Binary Prediction:", binary_prediction)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 20, 28)]          0         
                                                                 
 gru_1 (GRU)                 (None, 20, 32)            5952      
                                                                 
 dense_4 (Dense)             (None, 20, 28)            924       
                                                                 
 flatten_1 (Flatten)         (None, 560)               0         
                                                                 
 dense_5 (Dense)             (None, 64)                35904     
                                                                 
 dense_6 (Dense)             (None, 32)                2080      
                                                                 
 dense_7 (Dense)             (None, 16)                528 

ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "model_1" is incompatible with the layer: expected shape=(None, 20, 28), found shape=(32, 10, 28)


**Non Seq2seq with static data**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Concatenate, Flatten, Lambda
from tensorflow.keras.optimizers import Adam

# Hyperparameters
# input_seq_len = x_train_tseries.shape[1]
# num_features = x_train_tseries.shape[2]
# target_seq_len = y_train.shape[1]  # Number of steps to predict
# latent_dim = 64  # Dimension of the latent space

input_seq_len = 10
num_features = 20
target_seq_len = 10  # Number of steps to predict
latent_dim = 64  # Dimension of the latent space

# Example data preparation  nanti ini diganti sama tseries
x_train = np.random.rand(1000, input_seq_len, num_features)  # 1000 samples, 10 time steps, num_features features
y_train = np.random.rand(1000, target_seq_len, num_features)   # 1000 samples, 10 time steps, num_features features
x_train_static = np.random.rand(1000, 5)          # 1000 samples, 5 static features
y_train_binary = np.random.randint(0, 2, size=(1000, 1))  # 1000 samples, binary output

# Define the GRU model
input_seq = Input(shape=(input_seq_len, num_features))
static_input = Input(shape=(x_train_static.shape[1],))

gru_out = GRU(latent_dim, return_sequences=True)(input_seq)
seq_output = Dense(num_features, activation='linear')(gru_out)

# Flatten the sequence output
flatten_seq = Flatten()(seq_output)

# Concatenate the sequence output with static data
combined = Concatenate()([flatten_seq, static_input])

# Dense layers for binary classification
x = Dense(64, activation='relu')(combined)
x = Dense(32, activation='relu')(x)
binary_output = Dense(1, activation='sigmoid', name='binary_output')(x)

# Define and compile the final model
model = Model(inputs=[input_seq, static_input], outputs=[seq_output, binary_output])
model.compile(optimizer='adam', loss=['mse', 'binary_crossentropy'], metrics=['accuracy'])

# Train the model
model.fit([x_train, x_train_static], [y_train, y_train_binary], epochs=50, batch_size=32, validation_split=0.2)

# Function to predict sequence output and binary classification
def predict_sequence_and_binary(input_seq, static_input, model):
    seq_output, binary_output = model.predict([input_seq, static_input])
    return seq_output, binary_output

# Example usage
input_seq = x_train[:1]  # Take the first sample from training data
static_input = x_train_static[:1]
predicted_sequence, binary_prediction = predict_sequence_and_binary(input_seq, static_input, model)

print("Predicted Sequence:", predicted_sequence)
print("Binary Prediction:", binary_prediction)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predicted Sequence: [[[ 0.54411674  0.7707642   0.49372342  0.69172174  0.5205768
    0.6457886   0.5554383   0.6507038   0.7747755   0.74932873
    0.45969954  0.5946907   0.566247    0.63933855  0.683769
    0.77335006  0.69185966  0.6961073   0.5491131   0.6458108 ]
  [ 0.44741768  0.4611136   0.1707597   0.35720122  0.42490277
    0.58109766  0.45260587  0.34372953  0.71284175  0.74022853
    0.4414299

In [None]:
#torch seq2seq classification
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, dropout):
        super(Encoder, self).__init__()
        self.rnn = nn.GRU(input_dim, hidden_dim, n_layers, dropout=dropout)

    def forward(self, src):
        outputs, hidden = self.rnn(src)
        return hidden

class Decoder(nn.Module):
    def __init__(self, output_dim, hidden_dim, n_layers, dropout):
        super(Decoder, self).__init__()
        self.rnn = nn.GRU(output_dim, hidden_dim, n_layers, dropout=dropout)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.fc_binary = nn.Linear(hidden_dim, 1)  # Ubah dimensi keluaran menjadi 1
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden):
        output, hidden = self.rnn(input.unsqueeze(0), hidden)
        prediction = self.fc_out(output.squeeze(0))
        binary_output = torch.sigmoid(self.fc_binary(output.squeeze(0)))  # Tetap gunakan sigmoid untuk output biner
        return prediction, hidden, binary_output

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, hidden_dim,device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.fc_binary = nn.Linear(hidden_dim, 1)
        self.device = device

    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        trg_len = trg.shape[0]
        trg_vocab_size = self.decoder.fc_out.out_features
        outputs = torch.zeros(trg_len, trg.shape[1], trg_vocab_size).to(device)
        binary_outputs = torch.zeros(trg_len, trg.shape[1], 1).to(self.device)

        hidden = self.encoder(src)
        input = trg[0,:,:]

        for t in range(1, trg_len):
            output, hidden, binary_output = self.decoder(input, hidden)
            outputs[t] = output
            binary_outputs[t] = binary_output
            teacher_force = np.random.random() < teacher_forcing_ratio
            input = trg[t,:,:] if teacher_force else output

        final_binary_output = torch.sigmoid(self.fc_binary(output.squeeze(0)))
        return outputs, final_binary_output

# input_dim = x_train_tseries.shape[2]
# hidden_dim = 64
# output_dim = x_train_tseries.shape[2]
# n_layers = 2
# dropout = 0.5

input_dim = 20
hidden_dim = 64
output_dim = 20
n_layers = 2
dropout = 0.5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder = Encoder(input_dim, hidden_dim, n_layers, dropout)
decoder = Decoder(output_dim, hidden_dim, n_layers, dropout)
model = Seq2Seq(encoder, decoder, hidden_dim,device).to(device)

optimizer = optim.Adam(model.parameters())
criterion = nn.MSELoss()
binary_criterion = nn.BCELoss()

def train(model, iterator, optimizer, criterion, binary_criterion, clip):
    model.train()
    epoch_loss = 0

    for i, batch in enumerate(iterator):
        src = batch['src'].to(device)
        trg = batch['trg'].to(device)
        trg_binary = batch['trg_binary'].to(device)

        optimizer.zero_grad()

        output, binary_output = model(src, trg)

        output_dim = output.shape[-1]
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1, output_dim)

        binary_output = binary_output[1:].view(-1)
        trg_binary = trg_binary[1:].view(-1)

        loss = criterion(output, trg) + binary_criterion(binary_output, trg_binary)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)

# Dummy data
src_data = torch.rand(10, 32, input_dim).to(device)  # (sequence length, batch size, num_features)
trg_data = torch.rand(10, 32, output_dim).to(device)  # (sequence length, batch size, num_features)
trg_binary_data = torch.randint(0, 2, (10, 32, 1)).float().to(device)  # (sequence length, batch size, 1)

# Dummy iterator
class DummyIterator:
    def __iter__(self):
        return iter([{'src': src_data, 'trg': trg_data, 'trg_binary': trg_binary_data}])

iterator = DummyIterator()

# Train the model
clip = 1
for epoch in range(10):
    train_loss = train(model, iterator, optimizer, criterion, binary_criterion, clip)
    print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.4f}')


RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x20 and 64x1)