In [10]:
!pip install -q gputil


  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for gputil (setup.py) ... [?25l[?25hdone


In [11]:
import GPUtil


In [1]:
# Install dependencies
!pip install ray torch torchvision pandas scikit-learn
!pip install -U ray



In [83]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Re-defining the RealModel exactly as used during training
class RealModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(RealModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.layer2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.layer3 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = F.relu(self.bn1(self.layer1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.layer2(x)))
        x = self.dropout(x)
        x = self.layer3(x)
        return x

# Function to predict a single sample
def predict_single_sample(model_path, sample, input_size=20, num_classes=2):
    model = RealModel(input_size, num_classes)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()

    sample = torch.FloatTensor(sample[:input_size]).unsqueeze(0)
    with torch.no_grad():
        output = model(sample)
        _, predicted = torch.max(output.data, 1)

    return predicted.item()


In [8]:

!pip install -q ray

import ray
import torch.nn.functional as F

# Initialize Ray
ray.init(ignore_reinit_error=True)

 #Implement real training worker with metrics
@ray.remote
class RealTrainingWorker:
    def __init__(self, config):
        self.config = config
        self.model = RealModel(X_train.shape[1], len(np.unique(y_train)))
        self.optimizer = torch.optim.AdamW(self.model.parameters(),
                                         lr=config['lr'],
                                         weight_decay=config['weight_decay'])
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'max', patience=2)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def train(self, iterations):
        train_loader = DataLoader(train_dataset,
                                batch_size=self.config['batch_size'],
                                shuffle=True)

        val_loader = DataLoader(test_dataset,
                              batch_size=self.config['batch_size'])

        best_val_acc = 0
        early_stop_counter = 0

        for epoch in range(iterations):
            # Training
            self.model.train()
            train_loss = 0
            for batch in train_loader:
                x, y = batch
                x, y = x.to(self.device), y.to(self.device)

                self.optimizer.zero_grad()
                outputs = self.model(x)
                loss = F.cross_entropy(outputs, y)
                loss.backward()
                self.optimizer.step()
                train_loss += loss.item()

            # Validation
            self.model.eval()
            val_loss = 0
            correct = 0
            total = 0
            with torch.no_grad():
                for batch in val_loader:
                    x, y = batch
                    x, y = x.to(self.device), y.to(self.device)
                    outputs = self.model(x)
                    val_loss += F.cross_entropy(outputs, y).item()
                    _, predicted = torch.max(outputs.data, 1)
                    total += y.size(0)
                    correct += (predicted == y).sum().item()

            val_acc = correct / total
            self.scheduler.step(val_acc)

            # Early stopping check
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                early_stop_counter = 0
            else:
                early_stop_counter += 1
                if early_stop_counter >= 3:
                    break

        return {
            "accuracy": best_val_acc,
            "config": self.config,
            "device": str(self.device),
            "actual_iterations": epoch + 1
        }

2025-04-15 12:42:50,185	INFO worker.py:1852 -- Started a local Ray instance.


In [12]:
#  Initialize RubberBand with real resource monitoring
import psutil
import GPUtil
class ResourceMonitor:
    @staticmethod
    def get_usage():
        gpus = GPUtil.getGPUs()
        return {
            "cpu_usage": psutil.cpu_percent(),
            "ram_usage": psutil.virtual_memory().percent,
            "gpu_usage": gpus[0].load * 100 if gpus else 0,
            "gpu_mem": gpus[0].memoryUsed if gpus else 0
        }

def create_real_profiling():
    #  a benchmark to get real resource usage
    test_model = RealModel(X_train.shape[1], len(np.unique(y_train))).cuda()
    optimizer = torch.optim.Adam(test_model.parameters())

    start_time = time.time()
    for i, (x, y) in enumerate(DataLoader(train_dataset, batch_size=64)):
        x, y = x.cuda(), y.cuda()
        optimizer.zero_grad()
        outputs = test_model(x)
        loss = F.cross_entropy(outputs, y)
        loss.backward()
        optimizer.step()

        if i == 10:
            break

    iter_time = (time.time() - start_time) / 10
    resources = ResourceMonitor.get_usage()

    return ProfilingData(
        resource_usage={
            ResourceType.CPU: 2,
            ResourceType.GPU: 0.5,
            ResourceType.MEMORY: resources['gpu_mem'] / 1024  # GB
        },
        iteration_time=iter_time,
        accuracy_progression=lambda it: min(0.95, 0.7 + it*0.01)  # Real curve will vary
    )

In [14]:
ray.init(
    num_cpus=psutil.cpu_count(),
    num_gpus=torch.cuda.device_count(),
    dashboard_host='0.0.0.0',
    ignore_reinit_error=True
)


2025-04-15 12:46:03,380	INFO worker.py:1684 -- Calling ray.init() again after it has already been called.


0,1
Python version:,3.11.12
Ray version:,2.44.1


In [37]:
!mkdir images



In [104]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch

# Load sample dataset
digits = load_digits()
X, y = digits.data, digits.target

# Train/test split
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

# Convert to tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)


In [49]:
ray.shutdown()


In [52]:
import ray
from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

@ray.remote
class RealTrainingWorker:
    def __init__(self, config):
        self.config = config
        data = load_digits()
        X, y = data.data, data.target
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model = SGDClassifier(
            loss="log_loss",
            learning_rate="optimal",
            alpha=max(1e-6, config.get("weight_decay", 1e-4)),
            random_state=42
        )

    def train(self, iterations):
        for _ in range(iterations):
            self.model.partial_fit(self.X_train, self.y_train, classes=list(range(10)))
        preds = self.model.predict(self.X_val)
        acc = accuracy_score(self.y_val, preds)
        return {"accuracy": acc, "config": self.config}


In [105]:
import ray
import psutil
import torch

class ExperimentSpec:
    def __init__(self, n, r, R, eta=2, time_constraint=None, budget_constraint=None):
        self.n = n
        self.r = r
        self.R = R
        self.eta = eta
        self.time_constraint = time_constraint
        self.budget_constraint = budget_constraint

    def get_stage(self, i):
        n_i = int(self.n * (self.eta ** -i))
        r_i = int(self.r * (self.eta ** i))
        return n_i, r_i

    def num_stages(self):
        import math
        return int(math.floor(math.log(self.R / self.r, self.eta))) + 1

class ResourceMonitor:
    @staticmethod
    def get_usage():
        return {
            "cpu_usage": psutil.cpu_percent(),
            "ram_usage": psutil.virtual_memory().percent,
            "gpu_usage": 0,
            "gpu_mem": 0
        }

def run_real_experiment():
    ray.shutdown()
    ray.init(
        num_cpus=psutil.cpu_count(),
        num_gpus=torch.cuda.device_count(),
        ignore_reinit_error=True
    )

    # Hyperparameters
    hyperparams = [
        {"lr": 1e-3, "batch_size": 64, "weight_decay": 1e-4},
        {"lr": 3e-4, "batch_size": 128, "weight_decay": 1e-5},
        {"lr": 1e-4, "batch_size": 256, "weight_decay": 0},
        {"lr": 3e-3, "batch_size": 32, "weight_decay": 1e-3}
    ]

    spec = ExperimentSpec(n=4, r=5, R=30, eta=2)
    best_trials = []

    for stage_idx in range(spec.num_stages()):
        n_i, r_i = spec.get_stage(stage_idx)
        print(f"\n=== Stage {stage_idx+1}/{spec.num_stages()} ===")
        print(f"Running {n_i} trials for {r_i} iterations")

        workers = [RealTrainingWorker.remote(config) for config in hyperparams[:n_i]]
        futures = [worker.train.remote(r_i) for worker in workers]

        results = []
        while futures:
            done, futures = ray.wait(futures, timeout=5.0)
            if done:
                res = ray.get(done)
                results.extend(res)
                for r in res:
                    print(f"Trial completed: {r['accuracy']:.4f} accuracy")
            print(f"Resource usage: {ResourceMonitor.get_usage()}")

        results.sort(key=lambda x: -x["accuracy"])
        keep = max(1, len(results) // spec.eta)
        best_trials = results[:keep]
        hyperparams = [trial["config"] for trial in best_trials]

        print(f"Best this stage: {best_trials[0]['accuracy']:.4f}")

    print("\n=== Final Training ===")
    final_worker = RealTrainingWorker.remote(best_trials[0]["config"])
    final_result = ray.get(final_worker.train.remote(spec.R))

    print(f"\nFinal Accuracy: {final_result['accuracy']:.4f}")
    print(f"Best Config: {best_trials[0]['config']}")
    print(f"Resource Usage Summary: {ResourceMonitor.get_usage()}")

    return best_trials[0]


In [106]:
best_result = run_real_experiment()


2025-04-15 14:30:42,450	INFO worker.py:1852 -- Started a local Ray instance.



=== Stage 1/3 ===
Running 4 trials for 5 iterations
Trial completed: 0.9556 accuracy
Resource usage: {'cpu_usage': 18.5, 'ram_usage': 28.7, 'gpu_usage': 0, 'gpu_mem': 0}
Trial completed: 0.9194 accuracy
Resource usage: {'cpu_usage': 100.0, 'ram_usage': 28.8, 'gpu_usage': 0, 'gpu_mem': 0}
Trial completed: 0.9528 accuracy
Resource usage: {'cpu_usage': 100.0, 'ram_usage': 29.9, 'gpu_usage': 0, 'gpu_mem': 0}
Trial completed: 0.9333 accuracy
Resource usage: {'cpu_usage': 56.6, 'ram_usage': 30.2, 'gpu_usage': 0, 'gpu_mem': 0}
Best this stage: 0.9556

=== Stage 2/3 ===
Running 2 trials for 10 iterations
Trial completed: 0.9444 accuracy
Resource usage: {'cpu_usage': 100.0, 'ram_usage': 28.2, 'gpu_usage': 0, 'gpu_mem': 0}
Trial completed: 0.9472 accuracy
Resource usage: {'cpu_usage': 100.0, 'ram_usage': 28.2, 'gpu_usage': 0, 'gpu_mem': 0}
Best this stage: 0.9472

=== Stage 3/3 ===
Running 1 trials for 20 iterations
Trial completed: 0.9556 accuracy
Resource usage: {'cpu_usage': 99.4, 'ram_usage

In [108]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.data = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_id = str(self.data.iloc[idx, 0])
        label = int(self.data.iloc[idx, 2])

        image_path = os.path.join(self.image_dir, img_id)
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label


In [109]:
from torch.utils.data import Dataset
from PIL import Image

class RubberBandDataset(Dataset):
    def __init__(self, df, image_root, transform=None):
        self.df = df
        self.image_root = image_root
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['filename']
        label = self.df.iloc[idx]['label']
        img_path = os.path.join(self.image_root, img_name)
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)
        return image, label


In [110]:
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image
import pandas as pd
import os

class CustomImageDataset(Dataset):
    def __init__(self, csv_path, image_dir, transform=None):
        self.data = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.transform = transform if transform else transforms.ToTensor()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_id = str(self.data.iloc[idx, 0])
        label = int(self.data.iloc[idx, 1])


        img_path = os.path.join(self.image_dir, img_id + '.png')


        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)

        return image, label


In [111]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load  CSV
df = pd.read_csv('/content/updated_file.csv')

# Encode the labels
le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['label'])

print(df.head())


    filename  label  label_encoded
0   test.zip    dog              2
1       test    cat              1
2      train  truck              3
3  train.zip    car              0


In [113]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

train_dataset = CustomImageDataset(
    csv_path="/content/train_test_files.csv",
    image_dir="/content/extracted_files/train_test/train/train",
    transform=transform
)


In [114]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_path, image_dir, transform=None):
        self.data = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.transform = transform if transform else transforms.ToTensor()

        # label mapping
        self.label_to_idx = {label: idx for idx, label in enumerate(self.data['label'].unique())}
        self.data['label'] = self.data['label'].map(self.label_to_idx)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_id = str(self.data.iloc[idx, 0])
        label = int(self.data.iloc[idx, 1])
        img_path = os.path.join(self.image_dir, img_id + '.png')
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)
        return image, label


In [115]:
torch.save(final_model.state_dict(), "best_model.pth")
print(" Model saved!")


✅ Model saved!


In [116]:
def predict_single_sample(model_path, sample):
    model = RealModel(X_train.shape[1], len(np.unique(y_train)))
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()

    sample = torch.FloatTensor(sample).unsqueeze(0)  # Add batch dimension

    with torch.no_grad():
        output = model(sample)
        _, predicted = torch.max(output.data, 1)

    return predicted.item()


In [118]:
def predict_single_sample(model_path, sample, input_size=20, num_classes=2):
    model = RealModel(input_size, num_classes)
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()

    sample = torch.FloatTensor(sample).unsqueeze(0)
    with torch.no_grad():
        output = model(sample)
        _, predicted = torch.max(output.data, 1)

    return predicted.item()


In [122]:
def predict_single_sample(model_path, sample):

    input_size = X_train.shape[1]
    num_classes = len(np.unique(y_train))

    model = RealModel(input_size, num_classes)
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()

    sample = torch.FloatTensor(sample).unsqueeze(0)

    with torch.no_grad():
        output = model(sample)
        _, predicted = torch.max(output.data, 1)

    return predicted.item()

In [124]:
# Create loaders
image_dir = "/content/extracted_files/train_test"

train_dataset = ImageDataset(train_df, image_dir, transform=transform)
test_dataset = ImageDataset(test_df, image_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(" Data loaded successfully!")
print("Train samples:", len(train_dataset))
print("Test samples:", len(test_dataset))
print("Classes:", le.classes_)


✅ Data loaded successfully!
Train samples: 40000
Test samples: 10000
Classes: ['car' 'cat' 'dog' 'truck']


In [125]:

labels = ['dog', 'cat', 'truck', 'car']
df = pd.read_csv('/content/train_test_files.csv')


if len(labels) != len(df):
  labels = labels[:len(df)]

df['label'] = labels

# Now encode
le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['label'])
df.to_csv('updated_file.csv', index=False)

In [129]:
from torch.optim import AdamW


In [136]:


import torchvision.transforms as transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
 Point to your extracted images folder
image_dir = "/content/extracted_files/train_test"

train_dataset = ImageDataset(train_df, image_dir, transform=transform)
test_dataset  = ImageDataset(test_df,  image_dir, transform=transform)


train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False)

print(f"✔️ Train samples: {len(train_dataset)}")
print(f"✔️ Test  samples: {len(test_dataset)}")


✔️ Train samples: 40000
✔️ Test  samples: 10000


In [140]:
import torch.nn as nn
import torch.nn.functional as F

class RealModel(nn.Module):
    def __init__(self, num_classes):
        super(RealModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.3)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # [B, 32, 32, 32]
        x = self.pool(F.relu(self.conv2(x)))  # [B, 64, 16, 16]
        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x
