In [None]:
import torch
import torch.optim.adamw

In [6]:
import json
import numpy as np
import plotly.graph_objs as go

file_names = [
    "adamW_weightDecay_beta1_ablation_train_accuracy.json",
    "adamW_weightDecay_beta1_ablation_valid_accuracy.json",
    "adamW_weightDecay_beta2_ablation_train_accuracy.json",
    "adamW_weightDecay_beta2_ablation_valid_accuracy.json",
    "lion_weightDecay_beta1_ablation_train_accuracy.json",
    "lion_weightDecay_beta1_ablation_valid_accuracy.json",
    "lion_weightDecay_beta2_ablation_train_accuracy.json",
    "lion_weightDecay_beta2_ablation_valid_accuracy.json"
]

for file_name in file_names:
    with open(file_name, 'r') as file:
        data = json.load(file)

    # For the title, you can split the filename on "_" to get optimizer and metric
    title_info = file_name.split("_")
    optimizer = title_info[0]
    metric = title_info[-2]

    # Convert the accuracy data to a 2D NumPy array
    accuracy_data = np.array([values for lr, values in data.items()])

    # Create a 2D grid for X and Y (epochs and momentum)
    epochs = np.arange(1, len(accuracy_data[0]) + 1)
    # momentum = np.array([float(lr) for lr in data.keys()])
    momentum = np.array([f"{float(lr):.4f}" for lr in data.keys()])
    momentum = np.array([(str(lr)) for lr in momentum])
    # momentum = np.array([float(lr) for lr in data.keys()])

    # Create a trace for the 3D surface plot
    trace = go.Surface(x=epochs, y=momentum, z=accuracy_data, colorscale='Viridis')

    # Create a layout for the plot
    layout = go.Layout(
        scene=dict(
            xaxis=dict(title='Epochs'),
            yaxis=dict(title='momentum'),
            zaxis=dict(title='Accuracy'),
        ),
        title=f'{optimizer.upper()} - momentum vs {metric.capitalize()} Accuracy'
    )

    # Create a figure and add the trace with the layout
    fig = go.Figure(data=[trace], layout=layout)

    # Display the interactive plot
    fig.show()


In [1]:
import json
import numpy as np
import plotly.graph_objs as go
from plotly.subplots import make_subplots

file_names = [
    "adamW_weightDecay_beta1_ablation_valid_accuracy.json",
    "lion_weightDecay_beta1_ablation_valid_accuracy.json",
    "adamW_weightDecay_beta2_ablation_valid_accuracy.json",
    "lion_weightDecay_beta2_ablation_valid_accuracy.json"
]

# Create a subplot with 1 row and 2 columns
fig = make_subplots(rows=1, cols=2, subplot_titles=["AdamW", "Lion"], 
                    specs=[[{'type': 'surface'}, {'type': 'surface'}]])

for index, file_name in enumerate(file_names[:2]):  # Take the first two files for demonstration
    with open(file_name, 'r') as file:
        data = json.load(file)

    title_info = file_name.split("_")
    optimizer = title_info[0]
    metric = title_info[-2]

    accuracy_data = np.array([values for lr, values in data.items()])
    epochs = np.arange(1, len(accuracy_data[0]) + 1)
    momentum = np.array([f"{float(lr):.4f}" for lr in data.keys()])
    momentum = np.array([(str(lr)) for lr in momentum])

    trace = go.Surface(x=epochs, y=momentum, z=accuracy_data, colorscale='Viridis')

    # Add trace to subplot
    fig.add_trace(trace, row=1, col=index + 1)  # Add to the appropriate subplot

# Update layout for each subplot if needed
font_size = 10.5
fig.update_layout(
    title='Comparison of momentum Ablation for Different Optimizers',
    titlefont=dict(size=font_size + 2),
    font=dict(size=font_size),
    height=600,  # You may need to adjust the height and width to fit all plots comfortably
    width=1300,
    scene=dict(
        xaxis_title='Epochs',
        yaxis_title='Beta1',
        zaxis_title='Validation Accuracy'
    ),
    scene2=dict(
        xaxis_title='Epochs',
        yaxis_title='Beta1',
        zaxis_title='Validation Accuracy'
    )
)

# Display the interactive plot
fig.show()



# Create a subplot with 1 row and 2 columns
fig = make_subplots(rows=1, cols=2, subplot_titles=["AdamW", "Lion"], 
                    specs=[[{'type': 'surface'}, {'type': 'surface'}]])

for index, file_name in enumerate(file_names[2:4]):  # Take the first two files for demonstration
    with open(file_name, 'r') as file:
        data = json.load(file)

    title_info = file_name.split("_")
    optimizer = title_info[0]
    metric = title_info[-2]

    accuracy_data = np.array([values for lr, values in data.items()])
    epochs = np.arange(1, len(accuracy_data[0]) + 1)
    momentum = np.array([f"{float(lr):.4f}" for lr in data.keys()])
    momentum = np.array([(str(lr)) for lr in momentum])

    trace = go.Surface(x=epochs, y=momentum, z=accuracy_data, colorscale='Viridis')

    # Add trace to subplot
    fig.add_trace(trace, row=1, col=index + 1)  # Add to the appropriate subplot

# Update layout for each subplot if needed
font_size = 10.5
fig.update_layout(
    title='Comparison of momentum Ablation for Different Optimizers',
    titlefont=dict(size=font_size + 2),
    font=dict(size=font_size),
    height=600,  # You may need to adjust the height and width to fit all plots comfortably
    width=1300,
    scene=dict(
        xaxis_title='Epochs',
        yaxis_title='Beta2',
        zaxis_title='Validation Accuracy'
    ),
    scene2=dict(
        xaxis_title='Epochs',
        yaxis_title='Beta2',
        zaxis_title='Validation Accuracy'
    )
)

# Display the interactive plot
fig.show()


In [None]:
SGD performs better over a wider range of momentum.
Momemtum is performing in a small range when the learing rate is less around 0.01.
AdaGrad and Adam is Adaptive so its performing better over a wide range.
AdamW is performing on a small range because the weight Decay required for this task seems to be high.

Here we verify that Lion require very small momentum below 0.01 (in this case) while discussing it in the earlier Section. 

In [None]:
We are getting similar result like before:
SGD is performing better in wide range of momentum.
Momemtum, NAG still performing in a smaller range
AdamW is still not performing in wider range as the req weight decay seems to be high.

Since the Purple Area is more in Lion Graph so that means that Lion requires lower LR than AdamW


In [None]:
Even in different Domain, there is a persistance in our observation that:
Lion requers less LR than AdamW.

In [None]:
We took the Best Learing  rate for the respective optimizer from the prev Learining momentum
We obeserved that AdamW seems to be more sensitive to Weight Decay as compared to Lion. 

In [None]:
Momentum, RMS Prop, Adam are almost performing in the all range
Lion is performing better in the lower range beta1 
AdamW is performing better in the higher range of beta1 and there are some vallies in lower ranges

Adam and aAdmW are performing better in the higher range of beta2
Lion is performing better in the lower range of beta2 expecially around 0.999

NOTE : Remove LION from Wider Range (GRAPH)


lion and AdamW are performing good in beta1 in range 0.8 to 1 and beta2 in range 0.9 to 1

In [None]:
Our Lion is performing better in smaller Batch Size when compared to AdnamW in higher Batch Size.

The accuracy of inbulit lion and our lion is almost the same.

In [None]:
The Resukltst we found for image classification, are different for whgat are given in the paper but as we shifted the domain to nlp for sentiment analysis the resuilts matched wtht hte ooriginal lion paper.
As given in the lion paper, we see that lion performs beteter on higher batch size than ion lower batch size as compared to admaW.

The accuracy of inbulit lion and our lion is almost the same.

In [None]:
AdamW and Lion (There isnt much diff in operformance when the number of parameters was incresed with some times of parameter - We suspect WD to be its reason)
Rest, ther perofrm better with more parameter and batch norm

As we increase the number of parameter as in model2 , it overfits on the training the data and the validation accuracy decreases.
But as we normalize tin model 3, the overfitting reduces thus increasing the validation accuaray again.