In [1]:
### if running on google colab
# from google.colab import drive
# drive.mount('/content/drive')
# import os
# os.chdir('/content/drive/MyDrive/phd/project_af')

In [2]:
from helper_functions import *
from model_classes import *

In [None]:
# load data
data = pd.read_csv("./data/data_9s.csv")
print(data.head(5))
print(data.shape)

# loop through every 1000 rows per group 
x_train, x_spec_train, y_train = load_tensor("train", data)
x_valid, x_spec_valid, y_valid = load_tensor("valid", data)
x_test, x_spec_test, y_test = load_tensor("test", data)

# visualize one of the signals in data set
plt.plot(x_valid[5,0,:])
#plt.axis("off")
plt.title(str(y_test[5]))
plt.show();

# visualize one of the spectrograms in data set
plt.imshow(x_spec_valid[5,:,:,:].permute(1,2,0))
#plt.axis("off")
plt.title(str(y_valid[5]))
plt.show;


     0    1    2    3    4    5    6    7    8    9  ...  2694  2695  2696  \
0 -127 -162 -197 -229 -245 -254 -261 -265 -268 -268  ...   -48   -49   -50   
1  -41  -35  -28  -19   12   69  151  257  382  522  ...    10    10    10   
2    7    7    6    4    3    2    2    3    4    6  ...    53    50    46   
3  128  157  189  226  250  257  262  265  268  269  ...    -2     0     1   
4   13   15   18   21   24   27   31   35   41   48  ...  -154  -134  -112   

   2697  2698  2699  2700   2701    2702  2703  
0   -49   -48   -45     N  train  A00001     0  
1     9     9     8     N  train  A00001     1  
2    42    36    29     N  train  A00001     2  
3     4     7    10     N   test  A00002     0  
4   -88   -66   -31     N   test  A00002     1  

[5 rows x 2704 columns]
(27398, 2704)


# Estimate receptive field size 

In [None]:
print(x_spec_valid[5,:,:,:].shape)
# visualize one of the spectrograms in data set
plt.imshow(x_spec_valid[5,:,150:205,20:53].permute(1,2,0))
#plt.axis("off")
plt.title(str(y_valid[5]))
plt.show;

# Conv2d on spectrogram

In [None]:
# for 2d conv use spectrum
x_train = x_spec_train
x_valid = x_spec_valid

# batch_size, epoch and iteration
batch_size = 50
n_iters = 10000
num_epochs = n_iters / (len(x_train) / batch_size)
num_epochs = int(num_epochs)
print(num_epochs)

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_valid, y_valid)
train_loader = DataLoader(train, batch_size = batch_size, shuffle = False)
valid_loader = DataLoader(valid, batch_size = batch_size, shuffle = False)


# set up device
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print("Device", device)

# learning rate list
lr_list = [5e-4, 1e-4, 5e-5, 1e-5, 5e-6, 1e-6]

# initiate model object by name
model_name = "cnn_res3"

if model_name.__eq__("cnn_res3"):
    model = cnn_res4(dim_out=4).to(device)
if model_name.__eq__("cnn_res3"):
    model = cnn_res3(dim_out=4).to(device)
if model_name.__eq__("cnn_res2"):
    model = cnn_res2(dim_out=4).to(device)
if model_name.__eq__("cnn_resnet"):
    model = cnn_resnet(dim_out=4).to(device)
if model_name.__eq__("cnn_conv2d_vgg16"):
    model = cnn_conv2d_vgg16(dim_out=4).to(device)
if model_name.__eq__("cnn_conv2d_ggl"):
    model = cnn_conv2d_ggl(dim_out=4).to(device)
if model_name.__eq__("cnn_conv2d_diy"):
    model = cnn_conv2d_diy(dim_out=4).to(device)
if model_name.__eq__("cnn_conv2d_vgg16_custom"):
    model = cnn_conv2d_vgg16_custom(dim_out=4).to(device) 
    
np = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"total number of parameters in {model_name}: {np}")

# initiate optimizer
lr_past = lr_list[0]
optimizer = torch.optim.SGD(model.parameters(), lr=lr_past) 

# initiate loss module
loss_module = nn.CrossEntropyLoss()

# specify continuing checkpoints
file_path = './model/'+str(model_name)+'.pth' # Define the file path where the model is saved
if os.path.exists(file_path):
    print("continue from last checkpoint")
    checkpoint = torch.load(file_path)# Load the model and other training-related information
    model.load_state_dict(checkpoint['model_state_dict'], strict=False) # Load model weights
    loss_module = checkpoint['loss_module']
    lr_past = 0.005#checkpoint['lr'] # check model folder to see what is the last learning rate
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])# Load optimizer state if needed
    eval_df = checkpoint['eval_df'] # load eval_df from previous checkpoints
    plt.plot(eval_df['tf'])
    plt.plot(eval_df['vf'])
    plt.show;



In [None]:
num_epochs = 20
lr_past = 0.00001

for lr in [l for l in lr_list if l < lr_past]:
    print("learning rate = "+str(lr))
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    # save every 5 epoch
    for j in range(num_epochs//5):
        tl, vl, ta, va, tf, vf = train_model(model, optimizer, train_loader, valid_loader, loss_module, 5, device)
        # keep track of training and validation evaluation matrics
        eval_df_new = pd.DataFrame({"tl":tl,
                               "vl":vl,
                               "ta":ta,
                               "va":va,
                               "tf":tf,
                               "vf":vf})
        if 'eval_df' in locals() or 'eval_df' in globals():
            eval_df = pd.concat([eval_df, eval_df_new]) # add to checkpoint eval_df
            eval_df = eval_df.reset_index(drop=True)
        else:
            eval_df = eval_df_new
        # Save the model's state dictionary and any other necessary information
        torch.save({
            "eval_df": eval_df,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),  
            'loss_module': loss_module,
            'lr': lr,
        }, file_path)
        print(f'Model saved to {file_path}')

In [None]:
model_name = "cnn_conv2d_vgg16_custom"
# specify continuing checkpoints
file_path = './model/'+str(model_name)+'.pth' # Define the file path where the model is saved
if os.path.exists(file_path):
    print("continue from last checkpoint")
    checkpoint = torch.load(file_path)# Load the model and other training-related information
    eval_df = checkpoint['eval_df'] # load eval_df from previous checkpoints
    eval_df = eval_df.reset_index(drop=True)
    plt.plot(eval_df['tf'])
    plt.plot(eval_df['vf'])
    #plt.xlim(330,1200)
    plt.show;

In [None]:
model_name = "cnn_conv2d_vgg16"
# specify continuing checkpoints
file_path = './model/'+str(model_name)+'.pth' # Define the file path where the model is saved
if os.path.exists(file_path):
    print("continue from last checkpoint")
    checkpoint = torch.load(file_path)# Load the model and other training-related information
    eval_df = checkpoint['eval_df'] # load eval_df from previous checkpoints
    eval_df = eval_df.reset_index(drop=True)
    plt.plot(eval_df['tf'])
    plt.plot(eval_df['vf'])
    plt.show;