# Importing the Nursery dataset

In [1]:
!pip install -r ../requirements.txt



In [2]:
#!pip install ucimlrepo

from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
nursery = fetch_ucirepo(id=76) 
  
# data (as pandas dataframes) 
X = nursery.data.features 
y = nursery.data.targets 
  
# metadata 
print(nursery.metadata) 
  
# variable information 
print(nursery.variables) 


{'uci_id': 76, 'name': 'Nursery', 'repository_url': 'https://archive.ics.uci.edu/dataset/76/nursery', 'data_url': 'https://archive.ics.uci.edu/static/public/76/data.csv', 'abstract': ' Nursery Database was derived from a hierarchical decision model originally developed to rank applications for nursery schools.', 'area': 'Social Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 12960, 'num_features': 8, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1989, 'last_updated': 'Sun Jan 14 2024', 'dataset_doi': '10.24432/C5P88W', 'creators': ['Vladislav Rajkovic'], 'intro_paper': {'ID': 372, 'type': 'NATIVE', 'title': 'An application for admission in public school systems', 'authors': 'M. Olave, V. Rajkovic, M. Bohanec', 'venue': 'Expert Systems in Public Administration', 'year': 1989, 'journal': None, 'DOI': None, 

# Training MAMBA on Nursery

In [4]:
# %% [markdown]
# ## (1) Install Dependencies

# %%
#!pip install --upgrade pip
#!pip install transformers einops scikit-learn pandas torch

# %% [markdown]
# ## (2) Load and Process Nursery Dataset

# %%
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

# Load the Nursery dataset from UCI
dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.data"
columns = ["parents", "has_nurs", "form", "children", "housing", "finance", "social", "health", "class"]
nursery_data = pd.read_csv(dataset_url, names=columns)

# Preprocess the data
label_encoder = LabelEncoder()

for column in nursery_data.columns:
    nursery_data[column] = label_encoder.fit_transform(nursery_data[column])

# Separate features (X) and target (y)
X = nursery_data.drop('class', axis=1)
y = nursery_data['class']

# Convert to tensors
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Create PyTorch datasets and loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# %% [markdown]
# ## (3) Define Mamba Architecture for Structured Data

# %%
# Import necessary modules from Mamba architecture (Assume Mamba implementation is in the 'model' module)
# Import necessary modules from Mamba architecture (Assume Mamba implementation is in the 'model' module)
from models.model import Mamba, ModelArgs  # Use your Mamba implementation

import torch
import torch.nn as nn

# Custom MambaClassifier for tabular data
class MambaClassifier(nn.Module):
    def __init__(self, input_size, num_classes, d_model=64, n_layer=4):
        super(MambaClassifier, self).__init__()
        
        # Initial Linear Layer to map tabular data to a higher-dimensional space
        self.initial_fc = nn.Linear(input_size, d_model)  # input_size is 8 for Nursery, d_model is hidden size

        # Create a simple transformer-like structure to simulate Mamba behavior
        # You can replace this block with actual Mamba's layers if applicable
        self.transformer_blocks = nn.Sequential(
            nn.Linear(d_model, d_model),
            nn.ReLU(),
            nn.Linear(d_model, d_model),
            nn.ReLU()
        )

        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(d_model, 128),  # From d_model to 128 hidden units
            nn.ReLU(),
            nn.Linear(128, num_classes)  # Output layer maps to number of classes
        )

    def forward(self, x):
        # Pass the input through the initial fully connected layer
        x = self.initial_fc(x)

        # Process the transformed data through transformer blocks (or Mamba blocks)
        x = self.transformer_blocks(x)

        # Pass through the classifier head
        output = self.classifier(x)

        return output

# Define necessary parameters for the Mamba model
input_size = X_train.shape[1]  # Number of input features (8 for Nursery)
num_classes = len(nursery_data['class'].unique())  # Number of target classes
d_model = 64  # Dimensionality of Mamba model's hidden states
n_layer = 4   # Number of transformer layers

# Instantiate the custom Mamba classifier
model = MambaClassifier(input_size=input_size, num_classes=num_classes, d_model=d_model, n_layer=n_layer)
# %% [markdown]
# ## (4) Train the Model

# %%
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training loop
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# %% [markdown]
# ## (5) Evaluate the Model

# %%
# Switch to evaluation mode
model.eval()
y_pred = []
y_true = []

# Test the model on the test set
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy on the test set: {accuracy:.4f}')

ModuleNotFoundError: No module named 'models'