In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:

#!pip install ray
#!pip install -U tensorboardx
#!pip install pyserial

import numpy as np 
import pandas as pd 

import os
#import serial #import write
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision
from torchvision import transforms
from torchvision.datasets import FashionMNIST
from torchmetrics import Accuracy

from ray import air, tune
from ray.air import Checkpoint, session
from ray.air.config import RunConfig, ScalingConfig, CheckpointConfig
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining



In [None]:
#@title old net
class Net(nn.Module):
    def __init__(self, l1=12, l2=10,p=0.2,act="ReLU",batch=False):
        super(Net, self).__init__()
        """
        self.b0=nn.BatchNorm1d(784)
        self.fc1=nn.Linear(784, l1) # 16 input features, 12 output features also called neurons
        self.b1=nn.BatchNorm1d(l1)
        self.fc2=nn.Linear(l1, l2)
        self.b2=nn.BatchNorm1d(l2)
        self.out=nn.Linear(l2, 10)
        self.m = nn.LogSoftmax(dim=0)
        """
        drop = nn.Dropout(p)
        activation_functions={
            "ReLU": nn.ReLU(),
            "PReLU": nn.PReLU(),
            "LeakyReLU":nn.LeakyReLU(),
            "SELU":nn.SELU(),
            "RReLU":nn.RReLU(),
            "ELU":nn.ELU(),
            "SiLU":nn.SiLU(),
            "Sigmoid":nn.Sigmoid(),
            "Mish":nn.Mish(),
        }
        func=activation_functions[act]
        #print(self.act)
        
        self.layer1=nn.Sequential(nn.BatchNorm1d(784),
                                nn.Linear(784, l1),
                                func,
                                nn.BatchNorm1d(l1),
                                drop,
                               )
        self.layer2=nn.Sequential(nn.Linear(l1,l2),
                                func,
                                nn.BatchNorm1d(l2),
                                drop,
                               )
        
        self.layer3=nn.Sequential(nn.Linear(l2, 10),
                                nn.LogSoftmax(dim=0),
                               )
            
    def forward(self, x):
        """
        x = x.squeeze()
        x = self.fc1(x)
        #x = F.relu(x)
        x = self.act(x)
        x = self.drop(x)
       
        x = self.fc2(x)
        x = self.act(x)
        x = self.drop(x)
       
        x = self.out(x)
        output = self.m(x)
        """
               
        x = x.squeeze()
        x = self.layer1(x)
        x = self.layer2(x)
        output = self.layer3(x)
        
        return output
        

In [None]:
#@title og load data
def load_data(data_dir="/kaggle/working/"):
    training_data = torchvision.datasets.FashionMNIST(
    root="data_dir",
    train=True,
    download=True,
    transform=transforms.ToTensor()
    )

    test_data = torchvision.datasets.FashionMNIST(
    root="data_dir",
    train=False,
    download=True,
    transform=transforms.ToTensor()
    )
    training_data.data=torch.flatten(training_data.data, start_dim=1)
    test_data.data=torch.flatten(test_data.data, start_dim=1)
    #print(training_data.data.shape)
    return training_data, test_data
#t,te=load_data()
#print(t.data.shape)
#print(te.data.shape)

In [None]:
#@title accuracy
def test_accuracy(net, device="cpu"):
    _, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2
    )

    correct = 0
    total = 0
    with torch.no_grad():
        c=0
        for data in testloader:
                  #  c+=1
                    #if (c==2500):
                       # print('g',c)
                    #    break
                    images, labels = data
                    images, labels = images.to(device), labels.to(device)
                    #print(images.shape)
                    outputs = net(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
    return correct / total
#print(test_accuracy(Net(16,8)))
#print(test_accuracy(Net(16,8)))
#print(test_accuracy(Net(16,8)))

In [None]:
def train_cifar(config,epochs=10,data_dir=None,trainset=None,testset=None,tune=False,dataloaders=None,max_iter=10):
    net = Net(l1=config["l1"],l2=config["l2"],p=config["dropout"],act=config["act"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
            
    net.to(device)
    criterion = nn.CrossEntropyLoss()

    if config["opt"]=="SGD":
        optimizer = optim.SGD(net.parameters(), lr=config["lr"])#, momentum=.9)
        
    elif config["opt"]=="Adam":
        optimizer = optim.Adam(net.parameters(),lr=config["lr"])
        
    elif config["opt"]=="LBFGS":
        optimizer = optim.LBFGS(net.parameters(),lr=config["lr"])
        
    elif config["opt"]=="NAdam":
        optimizer = optim.NAdam(net.parameters(),lr=config["lr"])
        
        
    start_epoch = 0
    if tune:
        checkpoint = session.get_checkpoint()

        if checkpoint:
            checkpoint_state = checkpoint.to_dict()
            start_epoch = checkpoint_state["epoch"]
            net.load_state_dict(checkpoint_state["net_state_dict"])
            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])

    if dataloaders==None:
        test_abs = int(len(trainset) * 0.8)
        train_subset, val_subset = random_split(
            trainset, [test_abs, len(trainset) - test_abs]
        )

        trainloader = torch.utils.data.DataLoader(
            train_subset, batch_size=32, shuffle=True, num_workers=4
        )
        valloader = torch.utils.data.DataLoader(
            val_subset, batch_size=32, shuffle=True, num_workers=4
        )
    else:
        trainloader=dataloaders[0]
        valloader=dataloaders[1]
    
    
    IterMet={"acc_"+str(i): -1 for i in range(1,max_iter+1)} | {"loss_"+str(i): -1 for i in range(1,max_iter+1)} 
    IterMet['loss']=-1   
    IterMet['accuracy']=-1    
    
    for epoch in range(start_epoch, epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = net(inputs)
                        
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
           

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1
        
        if tune:
            checkpoint_data = {
                       "epoch": epoch,
                        "net_state_dict": net.state_dict(),
                        "optimizer_state_dict": optimizer.state_dict(),
            }
            
            checkpoint = Checkpoint.from_dict(checkpoint_data)
            acc=correct / total
            loss=val_loss / val_steps
            
            IterMet["acc_"+str(epoch+1)]=acc
            IterMet["loss_"+str(epoch+1)]=loss
            IterMet["accuracy"]=acc
            IterMet["loss"]=loss
            
            session.report(
                        IterMet,
                        checkpoint=checkpoint,
                )

        else:
            if epoch%5==0:
                print("epoch,val loss,%correct",epoch,val_loss,correct/total)
    print("Finished Training")
    
    return net

In [None]:
#@title hp3
def hyperparam_search2(config,scheduler="ASHA",cpu=4,gpu=0,max_num_epochs=10,concurrent=1):
    if scheduler=="ASHA":
        sched = ASHAScheduler(
            #metric="loss",
            #mode="min",
            #mode="max",
            max_t=max_num_epochs,
            grace_period=1,
            reduction_factor=2,
            brackets=1,
        )
    
    if scheduler=="PB2":
        pass
    
    trainset, testset = load_data() 
    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
            trainset, [test_abs, len(trainset) - test_abs]
    )
   
    trainloader = torch.utils.data.DataLoader(
            train_subset, batch_size=64, shuffle=True, num_workers=4
    )
    valloader = torch.utils.data.DataLoader(
            val_subset, batch_size=64, shuffle=True, num_workers=4
    )

    tuner = tune.Tuner(
        #tune.with_parameters(train_cifar,trainset=trainset,testset=testset,tune=True),
        tune.with_parameters(train_cifar,dataloaders=[trainloader,valloader],tune=True,max_iter=max_num_epochs),
        param_space=config,
        tune_config=tune.TuneConfig(
           # num_samples=num_samples, different when using grid search
            metric="accuracy",
            mode="max",
            scheduler=sched,
            max_concurrent_trials=concurrent
            )
    )
       
    results = tuner.fit()
    #session.shutdown()
    best_trial = results.get_best_result(metric="accuracy", mode="max")
   # all = results.get_results().
    df=results.get_dataframe()
    dfconfig=pd.DataFrame(best_trial.config,index=[0])
    dfmetrics=pd.DataFrame(best_trial.metrics,index=[0])
    
    df.to_csv("/kaggle/working/NAdam_search_results.csv")
    dfconfig.to_csv("/kaggle/working/NAdam_best_config.csv")
    dfmetrics.to_csv("/kaggle/working/NAdam_best_metrics.csv")

    print(type(best_trial.config))
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.metrics['loss']}")
    print(f"Best trial final validation accuracy: {best_trial.metrics['accuracy']}")
    
    return best_trial.config
    

In [None]:
#@title og hps
def hyperparam_search(config,scheduler="ASHA",cpu=4,gpu=0,num_samples=20,max_num_epochs=10):
    
    #determins how the searching works
    if scheduler=="ASHA":
        #tries various permutations of parameters
        scheduler = ASHAScheduler(
            metric="loss",
            #mode="min",
            mode="max",
            max_t=max_num_epochs,
            grace_period=1,
            reduction_factor=2,
        )
    
    if scheduler=="PB2":
        pass
    
    
    trainset, testset = load_data()
    
    """
    result = tune.run(
        partial(train_cifar,trainset=trainset,testset=testset,tune=True),
        resources_per_trial={"cpu": cpu, "gpu": gpu},
        config=config,
        #num_samples=num_samples,
        scheduler=scheduler,
        verbose=1
        from functools import partial
        
    )
    """
    return    
    best_trial = result.get_best_trial("loss", "min", "last")
    
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
    print(f"Best trial final validation accuracy: {best_trial.last_result['accuracy']}")
    

    best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint = best_trial.checkpoint.to_air_checkpoint()
    best_checkpoint_data = best_checkpoint.to_dict()
    best_trained_model.load_state_dict(best_checkpoint_data["net_state_dict"])
    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))
    return best_trial.config
    

In [None]:
def main():
    torch.manual_seed(0)  
    search_space = {
        #"opt": tune.grid_search(["NAdam","LBFGS","Adam","SGD"]),
        "opt": "NAdam",
        "l1": tune.grid_search([2**i for i in range(4,8)]),
        "l2": tune.grid_search([2**i for i in range(4,7)]),
        "lr": tune.grid_search([0.1,0.01,0.001]),
        "dropout": tune.grid_search([0.12,.16,0.2,0.24]),
         #"dropout": tune.grid_search([0.15,0.2,0.25]),
         "act": tune.grid_search(["ReLU",
                                  "PReLU",
                                  "LeakyReLU",
                                  "SELU",
                                  "RReLU",                               
                                  "ELU",
                                  "SiLU",
                                  "Sigmoid",
                                  "Mish",
                                 ]
                                )
    }
                                  
    #SNN 
    #batch, layer, group
                                 
                                 
    quick_space = {
        "opt": "SGD",
        "l1": tune.grid_search([2**i for i in range(4,7)]),
        #"l1": 20,
        "l2":16,
        "lr": .001,
        #"dropout": np.random.uniform(low=.1, high=.3),
        "dropout": .2,
        "act": "ReLU"
    }
    
    

    #train_cifar(dataloaders=[trainloader,valloader],config=quick_space)
    
    #print(s)
    
    
    #best_config=hyperparam_search2(quick_space,scheduler="ASHA",max_num_epochs=3,concurrent=0)
    best_config=hyperparam_search2(search_space,scheduler="ASHA",max_num_epochs=10,concurrent=0)
    #serial.write(b'\x03')
    
    """
    print("--------training model--------")
    trainset, testset = load_data() 
    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
            trainset, [test_abs, len(trainset) - test_abs]
    )
   
    trainloader = torch.utils.data.DataLoader(
            train_subset, batch_size=64, shuffle=True, num_workers=4
    )
    valloader = torch.utils.data.DataLoader(
            val_subset, batch_size=64, shuffle=True, num_workers=4
    )

    net=train_cifar(best_config,epochs=200,dataloaders=[trainloader,valloader])
    test_acc = test_accuracy(net)
    print("Best trial test set accuracy: {}".format(test_acc))
    torch.save(net,'/working')
    """

In [None]:
if __name__ == "__main__":
    main()