<a href="https://colab.research.google.com/gist/Muhammadmuraad/3cba69bcc06e07688855aac4d68ecd62/copy-of-4models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)

# One-hot encode the target variable
encoder = OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(y)

# Convert data to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y_onehot, dtype=torch.float32)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Define GRU cell without using nn.GRU
class CustomGRUCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(CustomGRUCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # Reset gate parameters
        self.W_ir = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.W_hr = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
        self.b_ir = nn.Parameter(torch.Tensor(hidden_size))
        self.b_hr = nn.Parameter(torch.Tensor(hidden_size))

        # Update gate parameters
        self.W_iz = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.W_hz = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
        self.b_iz = nn.Parameter(torch.Tensor(hidden_size))
        self.b_hz = nn.Parameter(torch.Tensor(hidden_size))

        self.init_weights()

    def init_weights(self):
        for p in self.parameters():
            if p.data.ndimension() >= 2:
                nn.init.xavier_uniform_(p.data)
            else:
                nn.init.zeros_(p.data)

    def forward(self, x, h):
        r = torch.sigmoid(x @ self.W_ir.t() + h @ self.W_hr.t() + self.b_ir + self.b_hr)
        z = torch.sigmoid(x @ self.W_iz.t() + h @ self.W_hz.t() + self.b_iz + self.b_hz)
        n = torch.tanh(x @ self.W_ir.t() + r * (h @ self.W_hr.t() + self.b_ir + self.b_hr))
        h_new = (1 - z) * n + z * h
        return h_new

# Define GRU model without using nn.GRU
class CustomGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CustomGRUModel, self).__init__()
        self.gru_cell = CustomGRUCell(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h = torch.zeros(x.size(0), self.gru_cell.hidden_size, dtype=x.dtype)
        for i in range(x.size(1)):
            h = self.gru_cell(x[:, i, :], h)
        out = self.fc(h)
        return out

# Instantiate the CustomGRUModel for GRU
input_size = X_train.shape[1]
hidden_size_gru = 32
output_size_gru = 3

model_gru = CustomGRUModel(input_size, hidden_size_gru, output_size_gru)

# Define self-attention mechanism without using nn.Softmax
class CustomSelfAttention(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(CustomSelfAttention, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # Attention parameters
        self.W_q = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.W_k = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.W_v = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.b_q = nn.Parameter(torch.Tensor(hidden_size))
        self.b_k = nn.Parameter(torch.Tensor(hidden_size))
        self.b_v = nn.Parameter(torch.Tensor(hidden_size))

        self.init_weights()

    def init_weights(self):
        for p in self.parameters():
            if p.data.ndimension() >= 2:
                nn.init.xavier_uniform_(p.data)
            else:
                nn.init.zeros_(p.data)

    def forward(self, x):
        q = torch.relu(x @ self.W_q.t() + self.b_q)
        k = torch.relu(x @ self.W_k.t() + self.b_k)
        v = torch.relu(x @ self.W_v.t() + self.b_v)

        attn_weights = torch.softmax(q @ k.t(), dim=1)
        attended_values = attn_weights @ v

        return attended_values

# Define self-attention model without using nn.Softmax
class CustomSelfAttentionModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CustomSelfAttentionModel, self).__init__()
        self.self_attention = CustomSelfAttention(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        attended_values = self.self_attention(x)
        out = self.fc(attended_values)
        return out

# Instantiate the CustomSelfAttentionModel for self-attention
hidden_size_self_attention = 32
output_size_self_attention = 3

model_self_attention = CustomSelfAttentionModel(input_size, hidden_size_self_attention, output_size_self_attention)

# Define the training parameters
criterion = nn.CrossEntropyLoss()

# Training loop for GRU model
optimizer_gru = optim.Adam(model_gru.parameters(), lr=0.001)
epochs = 50
for epoch in range(epochs):
    optimizer_gru.zero_grad()
    outputs = model_gru(X_train.unsqueeze(1))
    loss = criterion(outputs.squeeze(), torch.argmax(y_train, dim=1))
    loss.backward()
    optimizer_gru.step()

# Evaluate the GRU model
with torch.no_grad():
    outputs = model_gru(X_test.unsqueeze(1))
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == torch.argmax(y_test, dim=1)).sum().item()
    total = y_test.size(0)
    accuracy_gru = correct / total
    print(f'GRU Model - Accuracy: {accuracy_gru:.4f}')

# Training loop for self-attention model
optimizer_self_attention = optim.Adam(model_self_attention.parameters(), lr=0.001)
for epoch in range(epochs):
    optimizer_self_attention.zero_grad()
    outputs = model_self_attention(X_train)
    loss = criterion(outputs.squeeze(), torch.argmax(y_train, dim=1))
    loss.backward()
    optimizer_self_attention.step()

# Evaluate the self-attention model
with torch.no_grad():
    outputs = model_self_attention(X_test)
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == torch.argmax(y_test, dim=1)).sum().item()
    total = y_test.size(0)
    accuracy_self_attention = correct / total
    print(f'Self-Attention Model - Accuracy: {accuracy_self_attention:.4f}')

# Compare modelsk
if accuracy_gru > accuracy_self_attention:
    print("GRU model performs better on the Iris dataset.")
elif accuracy_gru < accuracy_self_attention:
    print("Self-Attention model performs better on the Iris dataset.")
else:
    print("Both models have similar performance on the Iris dataset.")