# FinBERT Example Notebook

This notebooks shows how to train and use the FinBERT pre-trained language model for financial sentiment analysis.

## Modules 

In [1]:
from pathlib import Path
import shutil
import os
import logging
import sys
sys.path.append('..')
!pip install finbert
from textblob import TextBlob
from pprint import pprint
from sklearn.metrics import classification_report

from transformers import AutoModelForSequenceClassification
!pip install finbert

from finbert.finbert import *
import finbert.utils as tools

%load_ext autoreload
%autoreload 2

project_dir = Path.cwd().parent
pd.set_option('max_colwidth', -1)


import wget
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

from plottify import autosize
import math
from argparse import ArgumentParser
from itertools import permutations
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from tqdm import tqdm
from torch import nn
import torch.nn.functional as F
import numpy as np
import random
from transformers.optimization import AdamW, get_linear_schedule_with_warmup


[31mERROR: Could not find a version that satisfies the requirement finbert (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for finbert[0m[31m
[0m

  from .autonotebook import tqdm as notebook_tqdm


[31mERROR: Could not find a version that satisfies the requirement finbert (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for finbert[0m[31m
[0m

  pd.set_option('max_colwidth', -1)


ModuleNotFoundError: No module named 'wget'

In [3]:
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.ERROR)

## Prepare the model

### Setting path variables:
1. `lm_path`: the path for the pre-trained language model (If vanilla Bert is used then no need to set this one).
2. `cl_path`: the path where the classification model is saved.
3. `cl_data_path`: the path of the directory that contains the data files of `train.csv`, `validation.csv`, `test.csv`.
---

In the initialization of `bertmodel`, we can either use the original pre-trained weights from Google by giving `bm = 'bert-base-uncased`, or our further pre-trained language model by `bm = lm_path`


---
All of the configurations with the model is controlled with the `config` variable. 

In [None]:
#lm_path = project_dir/'models'/'language_model'/'finbertTRC2'
#/'pytorch_model.bin'
lm_path = project_dir/'models'/'sentiment'
cl_path = project_dir/'models'/'classifier_model'/'finbert-sentiment'
cl_data_path = project_dir/'data'/'sentiment_data'

###  Configuring training parameters

You can find the explanations of the training parameters in the class docsctrings. 

In [None]:
# Clean the cl_path
try:
    shutil.rmtree(cl_path) 
except:
    pass

bertmodel = AutoModelForSequenceClassification.from_pretrained(lm_path,cache_dir=None, num_labels=3)


config = Config(   data_dir=cl_data_path,
                   bert_model=bertmodel,
                   num_train_epochs=4,
                   model_dir=cl_path,
                   max_seq_length = 48,
                   train_batch_size = 32,
                   learning_rate = 2e-5,
                   output_mode='classification',
                   warm_up_proportion=0.2,
                   local_rank=-1,
                   discriminate=True,
                   gradual_unfreeze=True)

In [None]:
finbert = FinBert(config)
finbert.base_model = 'bert-base-uncased'
finbert.config.discriminate=True
finbert.config.gradual_unfreeze=True

finbert.prepare_model(label_list=['positive','negative','neutral'])
train_data = finbert.get_data('train')
train_dataloader = finbert.get_loader(train_data, 'train')

model = finbert.create_the_model()


# Get the training examples
train_data = finbert.get_data('train')
train_dataloader = finbert.get_loader(train_data, 'train')
test_data = finbert.get_data('test')
test_dataloader = finbert.get_loader(test_data, 'train')



inputs=torch.tensor([]).to("cuda")
mask=torch.tensor([]).to("cuda")
type_ids=torch.tensor([]).to("cuda")
labels=torch.tensor([]).to("cuda")

for step, batch in enumerate(tqdm(train_dataloader, desc='Iteration')):
    batch = tuple(t.to("cuda") for t in batch)

    input_ids_temp, attention_mask, token_type_ids, label_ids_temp, agree_ids = batch
    #label_ids_temp[label_ids_temp==1]=0
    #label_ids_temp[label_ids_temp==2]=1


    labels=torch.cat((labels,label_ids_temp))
    inputs=torch.cat((inputs,input_ids_temp),axis=0)
    mask=torch.cat((mask,attention_mask),axis=0)
    type_ids=torch.cat((type_ids,token_type_ids),axis=0)


    
    
inputs_test=torch.tensor([]).to("cuda")
mask_test=torch.tensor([]).to("cuda")
type_ids_test=torch.tensor([]).to("cuda")

labels_test=torch.tensor([]).to("cuda")

for step, batch in enumerate(tqdm(test_dataloader, desc='Iteration')):
    batch = tuple(t.to("cuda") for t in batch)

    input_ids_temp, attention_mask, token_type_ids, label_ids_temp, agree_ids = batch
    #label_ids_temp[label_ids_temp==1]=0
    #label_ids_temp[label_ids_temp==2]=1


    labels_test=torch.cat((labels_test,label_ids_temp))
    inputs_test=torch.cat((inputs_test,input_ids_temp),axis=0)
    mask_test=torch.cat((mask_test,attention_mask),axis=0)
    type_ids_test=torch.cat((type_ids_test,token_type_ids),axis=0)
# print(labels.shape,inputs.shape)
# print(labels,inputs)
maxlen=torch.max(inputs).to("cpu").int().numpy()
print(maxlen)
print(inputs.int())
inputs=inputs.int()
labels=labels.long()
inputs_test=inputs_test.int()
labels_test=labels_test.long()

data=torch.cat((inputs,labels.reshape(-1,1)),dim=1).to("cpu")
data_test=torch.cat((inputs_test,labels_test.reshape(-1,1)),dim=1).to("cpu")

adds={}
adds["mask"]=mask.T
adds["mask_test"]=mask_test.T
adds["type_ids"]=type_ids.T
adds["type_ids_test"]=type_ids_test.T

data_l=list(zip(data,mask,type_ids))

print(data.shape,data_test.shape)

`finbert` is our main class that encapsulates all the functionality. The list of class labels should be given in the prepare_model method call with label_list parameter.

In [None]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x
bertmodel = AutoModelForSequenceClassification.from_pretrained(lm_path,cache_dir=None, num_labels=3)
config = Config(   data_dir=cl_data_path,
                   bert_model=bertmodel,
                   num_train_epochs=4,
                   model_dir=cl_path,
                   max_seq_length = 48,
                   train_batch_size = 32,
                   learning_rate = 2e-5,
                   output_mode='classification',
                   warm_up_proportion=0.2,
                   local_rank=-1,
                   discriminate=True,
                   gradual_unfreeze=True)

finbert = FinBert(config)
finbert.base_model = 'bert-base-uncased'
finbert.config.discriminate=True
finbert.config.gradual_unfreeze=False
finbert.prepare_model(label_list=['positive','negative','neutral'])

finbert.prepare_model(label_list=['positive','negative','neutral'])
train_data = finbert.get_data('train')
train_dataloader = finbert.get_loader(train_data, 'train')

fb=finbert.create_the_model()
for param in fb.bert.embeddings.parameters():
    param.requires_grad = False

for i in range(12):
    for param in fb.bert.encoder.layer[i].parameters():
        param.requires_grad = False
        
for param in fb.bert.pooler.parameters():
    param.requires_grad = False
for param in fb.dropout.parameters():
    param.requires_grad = False
# fb.dropout=Identity()
fb.classifier=Identity()


In [None]:
train_data, valid_data = data.T, data_test.T

print(train_data.shape)
mask=adds["mask"]
ids=adds["type_ids"]
embeds=torch.zeros((train_data.shape[1],768))
labels=torch.zeros((train_data.shape[1],))
for dt, msk, ids in [(train_data, adds["mask"], adds["type_ids"])]:
    model.train(False)
    dl = torch.split(dt, 32, dim=1)
    ml = torch.split(msk, 32, dim=1)
    il = torch.split(ids, 32, dim=1)
    for i_dl, input in enumerate(dl):
        input=input.to("cuda")
        with torch.set_grad_enabled(False):
            labels[i_dl*32:(i_dl+1)*32] = input[-1]
            embeds[i_dl*32:(i_dl+1)*32,:] = fb(input[:-1].T.int(),ml[i_dl].T.int(),il[i_dl].T.int())[0]

mask_test=adds["mask_test"].T
ids_test=adds["type_ids_test"].T         
embeds_test=torch.zeros((valid_data.shape[1],768))
labels_test=torch.zeros((valid_data.shape[1],))
for dt, msk, ids in [(valid_data, adds["mask_test"], adds["type_ids_test"])]:
    model.train(False)
    dl = torch.split(dt, 32, dim=1)
    ml = torch.split(msk, 32, dim=1)
    il = torch.split(ids, 32, dim=1)
    for i_dl, input in enumerate(dl):
        input=input.to("cuda")
        with torch.set_grad_enabled(False):
            labels_test[i_dl*32:(i_dl+1)*32] = input[-1]
            embeds_test[i_dl*32:(i_dl+1)*32,:] = fb(input[:-1].T.int(),ml[i_dl].T.int(),il[i_dl].T.int())[0]

In [None]:
import cvxpy as cp
import warnings
warnings.filterwarnings("ignore")
def one_hot(labels, num_classes=10):
    if torch.is_tensor(labels):
        y = torch.eye(num_classes) 
        return y[labels.long()] 
    else:
        y = np.eye(num_classes) 
        return y[labels.astype("long")] 
def drelu(x):
    return x>=0
def relu(x):
    return np.maximum(0,x)
def samp(Xcvx, Xcvx_test, act="relu", depth=2):
    d=Xcvx.shape[1]
    if depth==2:
        U=np.random.randn(d,num_neurons)
        preact=Xcvx@U
        b=np.random.uniform(np.min(preact),np.max(preact),num_neurons)
        if act == "relu":
            #dmat=drelu(Xcvx@U)
            dmat=drelu(preact-b.reshape(1,-1))
        else:
            dmat=np.sign(preact-b.reshape(1,-1))
        ### eliminate repeating arrangements
        dmat, ind=(np.unique(dmat,axis=1, return_index=True))
        U=U[:,ind]
        b=b[ind]
        if act == "relu":
            dmat_test=drelu(Xcvx_test@U-b.reshape(1,-1))
        else:
            dmat_test=np.sign(Xcvx_test@U-b.reshape(1,-1))
    else:
        mvec=[d]*(depth-1)
        mvec.append(num_neurons)
        A=Xcvx.copy()
        Atest=Xcvx_test.copy()
        for l in range(depth-1):
            U=np.random.randn(mvec[l],mvec[l+1])
            preact=A@U
            #print(np.max(preact),np.min(preact))
            b=np.zeros((mvec[l+1]))
            for i_col in range(mvec[l+1]):
                b[i_col]=np.random.uniform(np.min(preact[:,i_col]),np.max(preact[:,i_col]))
                # print(np.min(preact[:,i_col]),np.max(preact[:,i_col]),b[i_col])
                # plt.hist(preact[:,i_col], bins=preact[:,i_col].shape[0])
                # plt.axvline(b[i_col], color="red")
                # plt.show()
            #b=np.random.uniform(np.min(preact),np.max(preact),mvec[l+1])
            # mean1=np.mean((A@U), axis=0, keepdims=True)
            # std1=np.std((A@U), axis=0, keepdims=True)
            hidden=preact-b.reshape(1,-1)#(A@U-mean1)/std1
            hidden_test=Atest@U-b.reshape(1,-1)#((Atest@U)-mean1)/std1
            hidden[np.isnan(hidden)]=0
            hidden_test[np.isnan(hidden_test)]=0
            if l<depth-2:
                A=relu(hidden) #+ A
                Atest=relu(hidden_test) #+ Atest
                amax=np.max(A)
                A/=amax
                Atest/=amax
                A-=0.5
                Atest-=0.5
                
                # mean1=np.mean(A, axis=0, keepdims=True)
                # std1=np.std(A, axis=0, keepdims=True)
                # A=(A-mean1)/std1
                # Atest=(Atest-mean1)/std1               
            else:
                print("Activation used for sampling: ", act)
                if act == "relu":
                    dmat=drelu(hidden)
                else:
                    dmat=np.sign(hidden)
                ### eliminate repeating arrangements
                dmat, ind=(np.unique(dmat,axis=1, return_index=True))
                m=dmat.shape[1]
                if act == "relu":
                    dmat_test=drelu(hidden_test[:,ind])
                else:
                    dmat_test=np.sign(hidden_test[:,ind])
            #print(hidden)
    return dmat, dmat_test

                   
def CVX_MLP(Xcvx, y, Xcvx_test, ytest, mode="relaxed", act="relu", num_neurons=100, depth=2, beta=1e-4, num_classes=3, seed=0):
    random.seed(a=seed)
    np.random.seed(seed=seed)
    mode="relaxed"
    dmat, dmat_test = samp(Xcvx[:,:-1], Xcvx_test[:,:-1], act=act, depth=depth)
    m=dmat.shape[1]        
    print("Number of unique arrangements: ", m)

    Y=one_hot(y, num_classes)

    if mode=="exact":
        # Optimal CVX
        m=dmat.shape[1]
        Uopt1={}
        Uopt2={}
        for j in range(num_classes):
            Uopt1[j]=cp.Variable((d,m))
            Uopt2[j]=cp.Variable((d,m))

        ## Below we use hinge loss as a performance metric for binary classification
        Yopt1={}
        Yopt2={}
        for j in range(num_classes):
            Yopt1[j]=cp.Parameter((n,))
            Yopt2[j]=cp.Parameter((n,))
        reg=cp.Parameter((1))
        reg=0
        cost=0
        for j in range(num_classes):
            Yopt1[j]=cp.sum(cp.multiply(dmat,(Xcvx*Uopt1[j])),axis=1)
            Yopt2[j]=cp.sum(cp.multiply(dmat,(Xcvx*Uopt2[j])),axis=1)
            reg=(cp.mixed_norm(Uopt1[j].T,2,1)+cp.mixed_norm(Uopt2[j].T,2,1))
            cost+=cp.sum_squares(Y[:,j]-(Yopt1[j]-Yopt2[j]))/n+beta*reg
        constraints=[]
        # for j in range(num_classes):
        #     constraints+=[cp.multiply((2*dmat-np.ones((n,m1))),(X*Uopt1[j]))>=0]
        #     constraints+=[cp.multiply((2*dmat-np.ones((n,m1))),(X*Uopt2[j]))>=0]
        prob=cp.Problem(cp.Minimize(cost),constraints)
        prob.solve(solver=cp.MOSEK,warm_start=True, verbose=True)

        cvx_opt=prob.value
        print("Convex program objective value (eq (8)): ",cvx_opt)
    else:
        # Optimal CVX
        m1=dmat.shape[1]
        Uopt1={}
        for j in range(num_classes):
            Uopt1[j]=cp.Variable((d,m))

        ## Below we use hinge loss as a performance metric for binary classification
        Yopt1={}
        for j in range(num_classes):
            Yopt1[j]=cp.Parameter((n,))
        reg=cp.Parameter((1))
        reg=0
        cost=0
        for j in range(num_classes):
            Yopt1[j]=cp.sum(cp.multiply(dmat,(Xcvx*Uopt1[j])),axis=1)
            reg=(cp.mixed_norm(Uopt1[j].T,2,1))
            cost+=cp.sum_squares(Y[:,j]-Yopt1[j])/n+beta*reg
        constraints=[]
        # for j in range(num_classes):
        #     constraints+=[cp.multiply((2*dmat-np.ones((n,m1))),(X*Uopt1[j]))>=0]
        #     constraints+=[cp.multiply((2*dmat-np.ones((n,m1))),(X*Uopt2[j]))>=0]
        prob=cp.Problem(cp.Minimize(cost),constraints)
        prob.solve(solver=cp.MOSEK,warm_start=True, verbose=False)

        cvx_opt=prob.value
    
    if prob.status != "optimal":
        print("Convex: Status convex: ",prob.status)
    Uopt={}
    Ytest_est=np.zeros((ntest,num_classes))
    for j in range(num_classes):
        Uopt[j]=Uopt1[j].value
        Ytest_est[:,j]=np.sum(dmat_test*(Xcvx_test@Uopt[j]),axis=1)


    labels_est=np.argmax(Ytest_est,axis=1)
    test_acc=np.sum(ytest==labels_est)/ntest
    return test_acc

class TMLP(nn.Module):

    def __init__(self, d, num_neurons=1,  num_outs=2):
        super().__init__()

        self.mlp = nn.Sequential(
            nn.Linear(d, num_neurons),
            nn.ReLU(),
            nn.Linear(num_neurons, num_outs),
        )      
        self.d=d
        self.num_neurons=num_neurons
        self.num_outs=num_outs
    def forward(self, x):
        y_pred=self.mlp(x)
        return y_pred

from torch import optim
import numpy as np
def NCVX_MLP(Xcvx, y, Xcvx_test, ytest, train_epochs=100, learning_rate=1e-3, num_neurons=100, \
                                                    beta=1e-4, batch_size=32, num_classes=3, seed=0, device="cuda"):
    random.seed(a=seed)
    np.random.seed(seed=seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
 
    model = TMLP(Xcvx.shape[1], num_neurons=num_neurons, num_outs=num_classes).to(device)
    X=torch.tensor(Xcvx).float()
    Xtest=torch.tensor(Xcvx_test).float()
    label=torch.tensor(y).float()
    labeltest=torch.tensor(ytest).float()
    train_data = [(dt, lb) for dt, lb in zip(X,label) ]
    test_data = [(dt, lb) for dt, lb in zip(Xtest,labeltest) ]  
    
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
    
    
    criterion = nn.MSELoss()
    #optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)
    budget = np.floor(train_epochs*(X.shape[0]//batch_size))
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=budget//4,
                                                       verbose=False,
                                                       factor=0.5,
                                                       eps=1e-12)
    loss_vec=[]
    for epoch in range(train_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, one_hot(labels, num_classes=num_classes).to(device))
   
            for param in model.parameters():
                loss += beta*torch.norm(param)**2/2             
         
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            loss_vec.append(loss.item())
            scheduler.step(loss.item())


    # plt.semilogy(loss_vec,label=model_type)
    # plt.legend()
    # plt.show()
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct/total

def LinearModel(A, y, lamb=0):
    n_col = A.shape[1]
    return np.linalg.lstsq(A.T.dot(A) + lamb * np.identity(n_col), A.T.dot(y),rcond=None)[0]



In [None]:
nvec=[ 200, 400, 600, 800, 1000]#, 1200, 1400, 1600, 1800, 2000]#, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200]
nonconvex=[]
convex=[]
convex_deep=[]
linear=[]
seeds=[0]#,10,20]
for seed in seeds:
    print("seed: ", seed)
    random.seed(a=seed)
    np.random.seed(seed=seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

    for nt in nvec:
        sind=np.random.choice(embeds.shape[0],nt,replace=False)
        Xcvx=embeds[sind,:].cpu().numpy()
        Xcvx_test=embeds_test.cpu().numpy()
        y=labels[sind].cpu().numpy()
        ytest=labels_test.cpu().numpy()

        Xcvx=np.concatenate((Xcvx,np.ones((Xcvx.shape[0],1))),axis=1)
        Xcvx_test=np.concatenate((Xcvx_test,np.ones((Xcvx_test.shape[0],1))),axis=1)

        n,d=Xcvx.shape
        ntest=Xcvx_test.shape[0]

        print(n,d)

                


        train_epochs = 100
        num_neurons=100
        batch_size=32
        depth=8
        accuracy_list = {}
        accuracy_list['convex'] = []
        accuracy_list['convex_deep'] = []
        accuracy_list['nonconvex'] = []
        accuracy_list['linear'] = []
        test_arr=[]
        hyper_param=[]
        models=[ 'convex_deep', 'convex', 'nonconvex', 'linear']
        for model_type in models:
            print('model_type: ', model_type)
            #create a dictionary to store the accuracies for  'convex' and 'nonconvex'
            for beta in [1e-3]:
                #print('beta: ', beta)
                for i_lr, lr in enumerate([1e-1, 1e-2, 1e-3, 1e-4]):
                    if model_type == 'convex':
                        if i_lr==0:
                            test_acc = CVX_MLP(Xcvx, y, Xcvx_test, ytest, mode="relaxed", num_neurons=num_neurons, \
                                               beta=beta, depth=2, num_classes=3, seed=seed)
                    elif model_type == 'convex_deep':
                        if i_lr==0:
                            test_acc = CVX_MLP(Xcvx, y, Xcvx_test, ytest, mode="relaxed", num_neurons=num_neurons, \
                                               beta=beta, depth=depth, num_classes=3, seed=seed)
                    elif model_type == 'nonconvex':
                        test_acc = NCVX_MLP(Xcvx[:,:-1], y, Xcvx_test[:,:-1], ytest, train_epochs=train_epochs, learning_rate=lr, \
                                            num_neurons=num_neurons, beta=beta, batch_size=batch_size, num_classes=3, seed=seed)
                    else:
                        if i_lr==0:
                            model=LinearModel(Xcvx,one_hot(y,num_classes=3),lamb=beta*Xcvx.shape[0])
                            est_test=Xcvx_test@model
                            test_acc=np.sum(np.argmax(est_test,axis=1)==ytest)/ytest.shape[0]

                    test_arr.append(100 * test_acc)
                    hyper_param.append([beta,lr])
                    if model_type == 'convex':
                        if i_lr==0:
                            accuracy_list['convex'].append(test_acc)
                    elif model_type == 'convex_deep':
                        if i_lr==0:
                            accuracy_list['convex_deep'].append(test_acc)
                    elif model_type == 'nonconvex':
                        accuracy_list['nonconvex'].append(test_acc)
                    else:
                        accuracy_list['linear'].append(test_acc)
                    
                    if (model_type in ["convex","convex_deep","linear"] and i_lr==0) or model_type=="nonconvex":
                        print(f'Accuracy for beta={beta}, LR={lr}: {np.round(test_arr[-1],3)}')

            #print the largest accuracy for each model type
        for model_type in models:
            print(model_type+' max accuracy: ', max(accuracy_list[model_type]))
            print(accuracy_list[model_type])
            if model_type == 'convex':
                convex.append(max(accuracy_list[model_type]))
            elif model_type == 'convex_deep':
                convex_deep.append(max(accuracy_list[model_type]))
            elif model_type == 'nonconvex':
                nonconvex.append(max(accuracy_list[model_type]))
            else:
                linear.append(max(accuracy_list[model_type]))


            

In [None]:
convexr=np.array(convex).reshape(len(seeds),-1)
convex_deepr=np.array(convex_deep).reshape(len(seeds),-1)
nonconvexr=np.array(nonconvex).reshape(len(seeds),-1)
linearr=np.array(linear).reshape(len(seeds),-1)

ns=len(nvec)
nseed=len(seeds)
convexm=np.mean(convexr,axis=0)
convex_deepm=np.mean(convex_deepr,axis=0)
nonconvexm=np.mean(nonconvexr,axis=0)
linearm=np.mean(linearr,axis=0)

convexs=np.std(convexr,axis=0)
convex_deeps=np.std(convex_deepr,axis=0)
nonconvexs=np.std(nonconvexr,axis=0)
linears=np.std(linearr,axis=0)


colors=["blue", "orange", "green", "red"]


plt.plot(nvec, convexm, color=colors[0], label="convex")
upper=convexm+convexs
lower=convexm-convexs
plt.fill_between(nvec, lower, upper, color=colors[0], alpha=.1)
plt.plot(nvec, convex_deepm, color=colors[1], label="convex_deep")
upper=convex_deepm+convex_deeps
lower=convex_deepm-convex_deeps
plt.fill_between(nvec, lower, upper, color=colors[1], alpha=.1)

plt.plot(nvec, nonconvexm, color=colors[2], label="nonconvex")
upper=nonconvexm+nonconvexs
lower=nonconvexm-nonconvexs
plt.fill_between(nvec, lower, upper, color=colors[2], alpha=.1)
    
plt.plot(nvec, linearm, color=colors[3], label="linear")
upper=linearm+linears
lower=linearm-linears
plt.fill_between(nvec, lower, upper, color=colors[3], alpha=.1)

plt.legend()
plt.grid()
import os.path
save=False
if save:
    figpath="Finetuning_cvx_numneurons"+str(num_neurons)+"_depth"+str(depth)+"_nvec"+str(len(nvec))+"_numseed"+str(len(seeds))+"_beta"+str(beta)+".pdf"
    if os.path.isfile(figpath):
        figpath="Finetuning_cvx_numneurons"+str(num_neurons)+"_depth"+str(depth)+"_nvec"+str(len(nvec))+"_numseed"+str(len(seeds))+"_beta"+str(beta)+"v2.pdf"

    plt.savefig(figpath, format='pdf', dpi=300) 
    filepath="Finetuning_cvx_numneurons"+str(num_neurons)+"_depth"+str(depth)+"_nvec"+str(len(nvec))+"_numseed"+str(len(seeds))+"_beta"+str(beta)+".npz"
    if os.path.isfile(filepath):
        filepath="Finetuning_cvx_numneurons"+str(num_neurons)+"_depth"+str(depth)+"_nvec"+str(len(nvec))+"_numseed"+str(len(seeds))+"_beta"+str(beta)+"v2.npz"

    np.savez(filepath, nvec=nvec, convex=convexr, convex_deep=convex_deepr, nonconvex=nonconvexr, linear=linearr, seeds=seeds)

In [None]:
print(convexm)
print(convex_deepm)
print(nonconvexm)

In [None]:
print(convexm)
print(convex_deepm)
print(nonconvexm)




In [None]:
nonconvexr

In [None]:
filepath="Finetuning_cvx_numneurons"+str(num_neurons)+"_nvec"+str(len(nvec))+"_numseed"+str(len(seeds))+"_beta"+str(beta)+".npz"

saveddata=np.load(filepath)
convexr=saveddata["convex"]
convex_deepr=saveddata["convex_deep"]
nonconvexr=saveddata["nonconvex"]

In [None]:
nonconvexr

In [None]:
u,s,v=np.linalg.svd(embeds)
plt.semilogy(s.reshape(-1))