In [1]:
import math
import numpy as np
import pandas as pd
import sympy as sp
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Read in the data

In [2]:
# Read the data
df = pd.read_csv('../uo_nn_batch_123456-789101.csv', sep=';')
# drop the last column
df = df.drop(df.columns[-1], axis=1)
df.head()

Unnamed: 0,num_target,la,isd,niter,tex,tr_acc,te_acc,L*,Gconv
0,1,0.0,1,75,0.1874,100.0,100.0,5.72e-07,1
1,1,0.0,3,6,0.0186,100.0,100.0,2.91e-50,1
2,1,0.0,7,125125,68.7754,100.0,100.0,1.41e-05,0
3,1,0.01,1,58,0.4408,100.0,100.0,0.0276,1
4,1,0.01,3,45,0.2759,100.0,100.0,0.0276,1


In [3]:
# show the name of the columns
df.columns

Index(['num_target', 'la', 'isd', 'niter', 'tex', 'tr_acc', 'te_acc', 'L*',
       'Gconv'],
      dtype='object')

In [4]:
# transform the la andisd columns to a categorical column
df['la'] = df['la'].astype('category')
df['isd'] = df['isd'].astype('category')


# Global convergence

## GM

In [5]:
# select only the rows with 'isd' = 1
df_gm = df[df['isd'] == 1]
df_gm.head()

Unnamed: 0,num_target,la,isd,niter,tex,tr_acc,te_acc,L*,Gconv
0,1,0.0,1,75,0.1874,100.0,100.0,5.72e-07,1
3,1,0.01,1,58,0.4408,100.0,100.0,0.0276,1
6,1,0.1,1,23,0.1238,100.0,100.0,0.0971,1
9,2,0.0,1,385,1.3102,100.0,98.8,1.26e-06,1
12,2,0.01,1,139,0.5085,99.6,98.8,0.0515,1


### Global convergence percentage

In [6]:
# Select the number of rows with 'Gconv' = 1, for each value of 'la' and calculate the percentage.
pr1 = len(df_gm.loc[(df_gm['la'] == 0.00) & (df_gm['Gconv'] == 1)])/10 * 100
pr2 = len(df_gm.loc[(df_gm['la'] == 0.01) & (df_gm['Gconv'] == 1)])/10 * 100
pr3 = len( df_gm.loc[(df_gm['la'] == 0.1) & (df_gm['Gconv'] == 1)])/10 * 100

In [7]:
la_gm_conv_prc = pd.DataFrame({'lambda': ['0.00', '0.01', '0.1'], 'percentage': [pr1, pr2, pr3]})
la_gm_conv_prc

Unnamed: 0,lambda,percentage
0,0.0,70.0
1,0.01,100.0
2,0.1,100.0


In [8]:
fig = px.bar(la_gm_conv_prc, x='lambda', y='percentage',
             title='GM convergence percentage for different values of lambda',
             color='lambda',
             color_discrete_sequence =['#440154', '#21918c', '#fde725'])
fig.update_layout(
    xaxis_title=r'$lambda$',
    yaxis_title=r'GM convergence percentage (%)'
)
fig.show()
# save the figure
fig.write_image("GM_GC_for_la.png")

### L as a function of la

In [9]:
# plot a scatter plot of L* as a function of la
fig = px.scatter(df_gm, x='la', y='L*',
                    title='GM Loss function for the optimal weights as a function of lambda',
                    color='la',
                    color_discrete_sequence =['#440154', '#21918c', '#fde725'])
fig.update_layout(
    xaxis_title=r'$lambda$',
    yaxis_title=r'$L(w*,Xtr,ytr)$',
    xaxis_type='category'
)
fig.show()
# save the figure
fig.write_image("GM_L_for_la.png")

## QNM

In [10]:
# select only the rows with 'isd' = 1
df_qnm = df[df['isd'] == 3]
df_qnm.head()

Unnamed: 0,num_target,la,isd,niter,tex,tr_acc,te_acc,L*,Gconv
1,1,0.0,3,6,0.0186,100.0,100.0,2.91e-50,1
4,1,0.01,3,45,0.2759,100.0,100.0,0.0276,1
7,1,0.1,3,18,0.1063,100.0,100.0,0.0971,1
10,2,0.0,3,50,0.2484,100.0,98.8,1.66e-11,1
13,2,0.01,3,50,0.2503,99.6,98.8,0.0515,1


### Global convergence percentage

In [11]:
# Select the number of rows with 'Gconv' = 1, for each value of 'la' and calculate the percentage.
pr1 = len(df_qnm.loc[(df_qnm['la'] == 0.00) & (df_qnm['Gconv'] == 1)])/10 * 100
pr2 = len(df_qnm.loc[(df_qnm['la'] == 0.01) & (df_qnm['Gconv'] == 1)])/10 * 100
pr3 = len(df_qnm.loc[(df_qnm['la'] == 0.1) & (df_qnm['Gconv'] == 1)])/10 * 100

In [12]:
la_qnm_conv_prc = pd.DataFrame({'lambda': ['0.00', '0.01', '0.1'], 'percentage': [pr1, pr2, pr3]})
la_qnm_conv_prc

Unnamed: 0,lambda,percentage
0,0.0,80.0
1,0.01,100.0
2,0.1,100.0


In [13]:
fig = px.bar(la_qnm_conv_prc, x='lambda', y='percentage',
             title='GM convergence percentage for different values of lambda',
             color='lambda',
             color_discrete_sequence =['#440154', '#21918c', '#fde725'])
fig.update_layout(
    xaxis_title=r'$lambda$',
    yaxis_title=r'QNM convergence percentage (%)'
)
fig.show()
# save the figure
fig.write_image("QNM_GC_for_la.png")

### L as a function of la

In [14]:
# plot a scatter plot of L* as a function of la
fig = px.scatter(df_qnm, x='la', y='L*',
                    title='GM Loss function for the optimal weights as a function of lambda',
                    color='la',
                    color_discrete_sequence =['#440154', '#21918c', '#fde725'])
fig.update_layout(
    xaxis_title=r'$lambda$',
    yaxis_title=r'$L(w*,Xtr,ytr)$',
    xaxis_type='category'
)
fig.show()
# save the figure
fig.write_image("QNM_L_for_la.png")

## SGM

In [15]:
# select only the rows with 'isd' = 1
df_sgm = df[df['isd'] == 7]
df_sgm.head()

Unnamed: 0,num_target,la,isd,niter,tex,tr_acc,te_acc,L*,Gconv
2,1,0.0,7,125125,68.7754,100.0,100.0,1.4e-05,0
5,1,0.01,7,3750,2.4056,100.0,100.0,0.0314,0
8,1,0.1,7,3750,2.1992,53.6,89.6,0.2,0
11,2,0.0,7,1625,1.0,100.0,99.2,0.00271,0
14,2,0.01,7,1500,0.9148,95.2,99.2,0.0903,0


### Global convergence percentage

In [16]:
# Select the number of rows with 'Gconv' = 1, for each value of 'la' and calculate the percentage.
pr1 = len(df_sgm.loc[(df_sgm['la'] == 0.00) & (df_sgm['Gconv'] == 1)])/10 * 100
pr2 = len(df_sgm.loc[(df_sgm['la'] == 0.01) & (df_sgm['Gconv'] == 1)])/10 * 100
pr3 = len(df_sgm.loc[(df_sgm['la'] == 0.1) & (df_sgm['Gconv'] == 1)])/10 * 100

In [17]:
la_sgm_conv_prc = pd.DataFrame({'lambda': ['0.00', '0.01', '0.1'], 'percentage': [pr1, pr2, pr3]})
la_sgm_conv_prc

Unnamed: 0,lambda,percentage
0,0.0,0.0
1,0.01,0.0
2,0.1,0.0


In [18]:
fig = px.bar(la_sgm_conv_prc, x='lambda', y='percentage',
             title='GM convergence percentage for different values of lambda',
             color='lambda',
             color_discrete_sequence =['#440154', '#21918c', '#fde725'])
fig.update_layout(
    xaxis_title=r'$lambda$',
    yaxis_title=r'SGM convergence percentage (%)'
)
fig.show()
# save the figure
fig.write_image("SGM_GC_for_la.png")

### L as a function of la

In [19]:
# plot a scatter plot of L* as a function of la
fig = px.scatter(df_sgm, x='la', y='L*',
                    title='GM Loss function for the optimal weights as a function of lambda',
                    color='la',
                    color_discrete_sequence =['#440154', '#21918c', '#fde725'])
fig.update_layout(
    xaxis_title=r'$lambda$',
    yaxis_title=r'$L(w*,Xtr,ytr)$',
    xaxis_type='category'
)
fig.show()
# save the figure
fig.write_image("SGM_L_for_la.png")

## Global convergence as a function of la and isd

In [20]:
# https://github.com/serge-tochilov/barchart3d-plotly/blob/master/barchart3d.py
def barchart3d(labels, z_data, title, z_title,
               n_row=0, width=900, height=900, thikness=0.7, colorscale='Viridis',
               **kwargs):
    """
    Draws a 3D barchart
    :param labels: Array_like of bar labels
    :param z_data: Array_like of bar heights (data coords)
    :param title: Chart title
    :param z_title: Z-axis title
    :param n_row: Number of x-rows
    :param width: Chart width (px)
    :param height: Chart height (px)
    :param thikness: Bar thikness (0; 1)
    :param colorscale: Barchart colorscale
    :param **kwargs: Passed to Mesh3d()
    :return: 3D barchart figure
    """

    if n_row < 1:
        n_row = math.ceil(math.sqrt(len(z_data)))
    thikness *= 0.5
    ann = []
    
    fig = go.Figure()

    for iz, z_max in enumerate(z_data):
        x_cnt, y_cnt = iz % n_row, iz // n_row
        x_min, y_min = x_cnt - thikness, y_cnt - thikness
        x_max, y_max = x_cnt + thikness, y_cnt + thikness

        fig.add_trace(go.Mesh3d(
            x=[x_min, x_min, x_max, x_max, x_min, x_min, x_max, x_max],
            y=[y_min, y_max, y_max, y_min, y_min, y_max, y_max, y_min],
            z=[0, 0, 0, 0, z_max, z_max, z_max, z_max],
            alphahull=0,
            intensity=[0, 0, 0, 0, z_max, z_max, z_max, z_max],
            coloraxis='coloraxis',
            hoverinfo='skip',
            **kwargs))

        ann.append(dict(
            showarrow=False,
            x=x_cnt, y=y_cnt, z=z_max,
            text=f'<b>{labels[iz]}</b>',
            font=dict(color='white', size=11),
            bgcolor='rgba(0, 0, 0, 0.3)',
            xanchor='center', yanchor='middle',
            hovertext=f'{z_max} {labels[iz]}'))

    fig.update_layout(
        width=width, height=height,
        title=title, title_x=0.5,
        scene=dict(
            xaxis=dict(title='lambda value',
                        tickmode='array',
                        tickvals=[0, 1, 2],
                        ticktext=['0.00', '0.01', '0.1']),
            yaxis=dict(title='Optimization method',
                        tickmode='array',
                        tickvals=[0, 1, 2],
                        ticktext=['SGD', 'SGM', 'GM']),
            zaxis=dict(title=''),
            annotations=ann),
        coloraxis=dict(
            colorscale=colorscale,
            colorbar=dict(
                title=dict(
                    text=z_title,
                    side='right'),
                xanchor='right', x=1.0,
                xpad=0,
                ticks='inside')),
        legend=dict(
            yanchor='top', y=1.0,
            xanchor='left', x=0.0,
            bgcolor='rgba(0, 0, 0, 0)',
            itemclick=False,
            itemdoubleclick=False),
        showlegend=False)
    return fig

In [21]:
pr1_gm = len(df.loc[(df['la'] == 0.00) & (df['Gconv'] == 1) & (df['isd'] == 1)])/10 * 100
pr2_gm = len(df.loc[(df['la'] == 0.01) & (df['Gconv'] == 1) & (df['isd'] == 1)])/10 * 100
pr3_gm = len(df.loc[(df['la'] == 0.1) & (df['Gconv'] == 1) & (df['isd'] == 1)])/10 * 100

pr1_qnm = len(df.loc[(df['la'] == 0.00) & (df['Gconv'] == 1) & (df['isd'] == 3)])/10 * 100
pr2_qnm = len(df.loc[(df['la'] == 0.01) & (df['Gconv'] == 1) & (df['isd'] == 3)])/10 * 100
pr3_qnm = len(df.loc[(df['la'] == 0.1) & (df['Gconv'] == 1) & (df['isd'] == 3)])/10 * 100

pr1_sgm = len(df.loc[(df['la'] == 0.00) & (df['Gconv'] == 1) & (df['isd'] == 7)])/10 * 100
pr2_sgm = len(df.loc[(df['la'] == 0.01) & (df['Gconv'] == 1) & (df['isd'] == 7)])/10 * 100
pr3_sgm = len(df.loc[(df['la'] == 0.1) & (df['Gconv'] == 1) & (df['isd'] == 7)])/10 * 100

In [22]:
la_isd_conv_prc = [pr1_gm, pr2_gm, pr3_gm, pr1_qnm, pr2_qnm, pr3_qnm, pr1_sgm, pr2_sgm, pr3_sgm]
label = ['GM-0.00', 'GM-0.01', 'GM-0.1', 'QNM-0.00', 'QNM-0.01', 'QNM-0.1', 'SGM-0.00', 'SGM-0.01', 'SGM-0.1']

In [23]:
fig = barchart3d(
    label, la_isd_conv_prc,
    'Convergence percentage for all algorithm-lambda combinations', 'Percentage',
    colorscale='Viridis', opacity=0.6, flatshading=True)
fig.show()
# save the figure
fig.write_image("GC_for_la.png")

In [24]:
# convert to stringisd and la columns
df['isd'] = df['isd'].astype(str)
df['la'] = df['la'].astype(str)

# change the values ofisd and la columns
df['isd'] = df['isd'].replace({'1': 'GM', '3': 'QNM', '7': 'SGM'})

df["algorithm-lambda"] = df[["isd", "la"]].apply("-".join, axis=1)
df.head()

Unnamed: 0,num_target,la,isd,niter,tex,tr_acc,te_acc,L*,Gconv,algorithm-lambda
0,1,0.0,GM,75,0.1874,100.0,100.0,5.72e-07,1,GM-0.0
1,1,0.0,QNM,6,0.0186,100.0,100.0,2.91e-50,1,QNM-0.0
2,1,0.0,SGM,125125,68.7754,100.0,100.0,1.41e-05,0,SGM-0.0
3,1,0.01,GM,58,0.4408,100.0,100.0,0.0276,1,GM-0.01
4,1,0.01,QNM,45,0.2759,100.0,100.0,0.0276,1,QNM-0.01


In [25]:
# plot a scatter plot of L* as a function of la
fig = px.scatter(df, x='algorithm-lambda', y='L*',
                    title='Loss function for the optimal weights as a function of the algorithm and lambda combination',
                    color='algorithm-lambda',
                    color_discrete_sequence =['#440154', '#472d7b', '#3b528b', '#2c728e', '#21918c', '#28ae80', '#5ec962', '#addc30', '#fde725'])
fig.update_layout(
    xaxis_title=r'$algorithm-\lambda$',
    yaxis_title=r'$L(w*,Xtr,ytr)$',
    xaxis_type='category'
)
fig.show()
# save the figure
fig.write_image("L_for_al-la.png")

lambda = 0 da muy buena minimizacion de la loss function.

# MEH

In [26]:
# # Read the data
# Lf = pd.read_csv('../loss_and_gradient.csv', sep=';')
# Lf.shape

In [27]:
# # see the values of the first column
# Lf['num_target'].unique()

In [28]:
# def get_fig(Lfi, method, legend):
#     # plot the first column of Lf1 and index it from 1 to the length of the column using plotly. Avoid nan values
#     fig = go.Figure()
#     fig.add_trace(go.Scatter(x=np.arange(1, len(Lfi[f'L-{method}-0.00'])+1), y=Lfi[f'L-{method}-0.00'],
#                                 mode='lines+markers',
#                                 line=dict(color="#440154"),
#                                 name=f'{method}-0.00',
#                                 showlegend=legend))
#     fig.add_trace(go.Scatter(x=np.arange(1, len(Lfi[f'L-{method}-0.01'])+1), y=Lfi[f'L-{method}-0.01'],
#                                 mode='lines+markers',
#                                 line=dict(color="#21918c"),
#                                 name=f'{method}-0.01',
#                                 showlegend=legend))
#     fig.add_trace(go.Scatter(x=np.arange(1, len(Lfi[f'L-{method}-0.1'])+1), y=Lfi[f'L-{method}-0.1'],
#                                 mode='lines+markers',
#                                 line=dict(color="#fde725"),
#                                 name=f'{method}-0.1',
#                                 showlegend=legend))
#     return fig

In [29]:
# fig = make_subplots(
#     rows=4, cols=3,
#     subplot_titles=("GM for digit 0", "GM for digit 1", "GM for digit 2", "GM for digit 3",
#                    "GM for digit 4", "GM for digit 5", "GM for digit 6", "GM for digit 7", "GM for digit 8", "GM for digit 9"))

# i = 1; j = 1; legend = True
# for num_target in range(0, 10):
#     # select rows where the column 'num_target' is equal to num_target
#     Lfi = Lf.loc[Lf['num_target'] == num_target]
#     fig2 = get_fig(Lfi, 'GM', legend)
#     fig.add_trace(fig2.data[0], row=i, col=j)
#     fig.add_trace(fig2.data[1], row=i, col=j)
#     fig.add_trace(fig2.data[2], row=i, col=j)
#     fig.update_xaxes(range=[0, 100], row=i, col=j)
#     j = j+1; legend = False
#     if j == 4:
#         i = i+1; j = 1

# fig.update_layout(title_text="Loss function evaluated on w^k using the GM", height=700)
# fig.show()
# # save the figure
# fig.write_image("GM_GC.png")

In [30]:
# fig = make_subplots(
#     rows=4, cols=3,
#     subplot_titles=("QNM for digit 0", "QNM for digit 1", "QNM for digit 2", "QNM for digit 3",
#                    "QNM for digit 4", "QNM for digit 5", "QNM for digit 6", "QNM for digit 7", "QNM for digit 8", "QNM for digit 9"))

# i = 1; j = 1; legend = True
# for num_target in range(0, 10):
#     # select rows where the column 'num_target' is equal to num_target
#     Lfi = Lf.loc[Lf['num_target'] == num_target]
#     fig2 = get_fig(Lfi, 'QNM', legend)
#     fig.add_trace(fig2.data[0], row=i, col=j)
#     fig.add_trace(fig2.data[1], row=i, col=j)
#     fig.add_trace(fig2.data[2], row=i, col=j)
#     fig.update_xaxes(range=[0, 100], row=i, col=j)
#     j = j+1; legend = False
#     if j == 4:
#         i = i+1; j = 1

# fig.update_layout(title_text="Loss function evaluated on w^k using the QNM", height=700)
# fig.show()
# # save the figure
# fig.write_image("QNM_GC.png")

In [31]:
# fig = make_subplots(
#     rows=4, cols=3,
#     subplot_titles=("SGM for digit 0", "SGM for digit 1", "SGM for digit 2", "SGM for digit 3",
#                    "SGM for digit 4", "SGM for digit 5", "SGM for digit 6", "SGM for digit 7", "SGM for digit 8", "SGM for digit 9"))

# i = 1; j = 1; legend = True
# for num_target in range(0, 10):
#     # select rows where the column 'num_target' is equal to num_target
#     Lfi = Lf.loc[Lf['num_target'] == num_target]
#     fig2 = get_fig(Lfi, 'SGM', legend)
#     fig.add_trace(fig2.data[0], row=i, col=j)
#     fig.add_trace(fig2.data[1], row=i, col=j)
#     fig.add_trace(fig2.data[2], row=i, col=j)
#     fig.update_xaxes(range=[0, 100], row=i, col=j)
#     j = j+1; legend = False
#     if j == 4:
#         i = i+1; j = 1

# fig.update_layout(title_text="Loss function evaluated on w^k using the SGM", height=700)
# # save the figure
# fig.write_image("SGM_GC.png")

# Local convergence

## Speed of convergence 

Compare the speed of convergence of the three algorithms in terms of the
execution time and number of iterations.

### GM

In [32]:
# do a subplot with two bar plots. One for the 'niter' column and one for the 'tex' column, differentiated by the 'isd' column.

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=("Number of iterations", "Execution time (s)"))


fig.add_trace(go.Bar(x=df_gm['la'], y=df_gm['niter'],
                    name='niter', marker_color='#440154',
                    # set the xaxis as categorical
                    xaxis='x1', yaxis='y1'
                    ), row=1, col=1)
fig.add_trace(go.Bar(x=df_gm['la'], y=df_gm['tex'],
                    name='tex', marker_color='#21918c',
                    # set the xaxis as categorical
                    xaxis='x2', yaxis='y2'
                    ), row=1, col=2)

fig.update_layout(title_text="Number of iterations and execution time for the GM",
                    xaxis=dict(domain=[0, 0.45], title='lambda', type='category'),
                    xaxis2=dict(domain=[0.55, 1], title='lambda', type='category'),
                    height=500)
fig.show()

In [37]:
# sum al the values in the 'niter' for each value of 'isd'
df_gm.groupby('la')['niter'].sum()

la
0.0     4312
0.01    1366
0.1      371
Name: niter, dtype: int64

In [39]:
# do a scatter plot with the 'niter' column as a function of the 'num_target' column, differentiated by the 'la' column. Connect the points with a line.

fig = px.scatter(df_gm, x="num_target", y="niter", color="la", trendline="ols")
fig.update_layout(title_text="Number of iterations for the GM as a function of the target digit")
fig.show()

### QNM

### SGM

### Local convergence in general

In [33]:
fig = px.scatter(df, x="niter", y="L*",
	            size="tex", color="isd",
                hover_name="algorithm-lambda", log_x=True, size_max=60)
fig.show()
# save the figure
fig.write_image("LC-al-la.png")