In [None]:
import sys
import torch
import torch.optim as optim
import torchvision
import argparse
import torch.nn as nn
import pandas as pd
import re
import os
import seaborn as sns
from torchvision import transforms
from matplotlib.pyplot import figure
from matplotlib import pyplot as plt
from utils import *

sys.argv = ['']

parser = argparse.ArgumentParser(description='Parameters training')
parser.add_argument('--model_architecture', type=str, default="VGG16", help='....')
parser.add_argument('--dataset', type=str, default="CIFAR10", help='....')
parser.add_argument('--batch_size', type=int, default=8, help='....')
parser.add_argument('--device', type=str, default=None, help='....')
parser.add_argument('--eval_metric', default="accuracy", help='....')

args = parser.parse_args()

if args.device is None:
    import torch
    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
##
# Modify if necessary
args.dataset = "Date_Fruit_7classes"
args.eval_metric = "f1_score"
custom_split = 1
##


model_directory = f"models/{args.dataset}"
model_paths = []

for filename in os.listdir(model_directory):
    if filename.endswith(".pth"):
        model_path = os.path.join(model_directory, filename)
        model_paths.append(model_path)

# Results Dataframe

In [None]:
_, test_loader, num_classes, _ = get_dataset(args, custom_split = custom_split)

In [None]:
df = pd.DataFrame({'model_paths': model_paths})
df['pruning_type'] = df['model_paths'].apply(lambda x: re.search(r'(random|weight|SenpisFaster)', x).group() if re.search(r'(random|weight|SenpisFaster)', x) else None)
df['model_type'] = df['model_paths'].apply(lambda x: re.search(fr'(HOMOGENEA|CRECIENTE|DECRECIENTE|MAS_MENOS_MAS|MENOS_MAS_MENOS|UNPRUNED)', x).group())
df['pr_base'] = df['model_paths'].apply(lambda x: re.search(r'(20|30|50)', x).group() if re.search(r'(20|30|50)', x) else None)
df['seed'] = df['model_paths'].apply(lambda x: re.search(r'(?<=SEED_)\d+', x).group() if re.search(r'(?<=SEED_)\d+', x) else None)
df['finetuned'] = df['model_paths'].apply(lambda x: 'FT' in x)
df['dataset'] = df['model_paths'].apply(lambda x: re.search(fr'{args.dataset}', x).group())
df

In [None]:
df['metric'] = 0
df['metric_used'] = args.eval_metric

criterion = nn.CrossEntropyLoss()
for i,model_path in enumerate(model_paths):
    model = torch.load(model_path)
    test_loss, test_acc = test_epoch(model, args.device, test_loader, criterion, args.eval_metric, num_classes)
    if torch.is_tensor(test_acc):
        test_acc = test_acc.item()
    df['metric'].iloc[i] = test_acc
    print(f"{args.eval_metric} del modelo {model_path}: {test_acc:.3f}")

In [None]:
if not os.path.exists(f"results/{args.dataset}"):
    os.makedirs(f"results/{args.dataset}")
df.to_csv(f'results/{args.dataset}/results_{args.dataset}.csv')

# Results Plot

In [None]:
################################################
## Modify if necessary to select method and pruning base
#pr_method:  weight, SenpisFaster, random
pr_method = "weight"
#pr_distribution: HOMOGENEA, CRECIENTE, DECRECIENTE, MENOS_MAS_MENOS, MAS_MENOS_MAS
pr_distribution = "MAS_MENOS_MAS"
#base_percentage: 20, 30, 50
base_percentage = 20
################################################

model_type_names = {'UNPRUNED':'Unpruned',
                    'CRECIENTE':'$PR_2$',
                    'DECRECIENTE':'$PR_3$',
                    'HOMOGENEA':'$PR_1$',
                    'MAS_MENOS_MAS':'$PR_5$',
                    'MENOS_MAS_MENOS':'$PR_4$'
                   }

pruning_type_names = {'Unpruned':'Unpruned',
                      'random':'Random',
                      'weight':'Weight',
                      'SenpisFaster':'SeNPIS Faster'
                     }

In [None]:
#seed_type_names = {'23':'1',
#                    '42':'2',
#                    '97':'3',
#                    '112':'4',
#                    '167':'5',
#                    '214':'6',
#                    '256':'7',
#                    '333':'8',
#                    '425':'9',
#                    '512':'10',
#                    'original':'original'
#                   }

if pr_method != 'random':
    seed_type_names = {'23':'1',
                       'Unpruned':'Unpruned'
                      }
else:
    seed_type_names = {'23':'1',
                       '42':'2',
                       '97':'3',
                       '112':'4',
                       '167':'5',
                       'Unpruned':'Unpruned'
                      }

In [None]:
df = pd.read_csv(f'results/{args.dataset}/results_{args.dataset}.csv')

In [None]:
df.model_type.value_counts()

### By Pruning Method 
X axis = Pruning Distribution

In [None]:
df_method = df[((df["pruning_type"] == pr_method) & (df["pr_base"] == base_percentage)) | (df["model_type"] == "UNPRUNED")]

In [None]:
df_method.model_type.value_counts()

In [None]:


df_method.model_type = df_method.model_type.map(model_type_names)

In [None]:
df_pruned = df_method[(df_method.finetuned == True) | (df_method.model_type == 'Unpruned')][['model_type','metric']]

In [None]:
plt.rcParams['figure.figsize'] = [10, 5]


ax = sns.boxplot(x='model_type', y='metric', data=df_pruned, order=["Unpruned", "$PR_1$", "$PR_2$", "$PR_3$", "$PR_4$", "$PR_5$"])
# add stripplot
ax = sns.stripplot(x='model_type', y='metric', data=df_pruned, color="orange", jitter=0.3, size=4, order=["Unpruned", "$PR_1$", "$PR_2$", "$PR_3$", "$PR_4$", "$PR_5$"])

ax.yaxis.grid(True) # Hide the horizontal gridlines
ax.xaxis.grid(True) # Show the vertical gridlines
# add title
ax.set(xlabel='Pruning Distributions', ylabel=f'{args.eval_metric.capitalize()}', title=f"{pruning_type_names[pr_method]} / Global PR {base_percentage}%")
plt.savefig(f'results/{args.dataset}/PR_BOXPLOT_{pruning_type_names[pr_method]}_{base_percentage}.png', dpi=1000)
# show the graph
plt.show()


### By Method/Seed
X axis = Seed

In [None]:
df_seeds = df_method[(df_method.finetuned == True) | (df_method.model_type == 'Unpruned')][['seed','metric']]
df_seeds = df_seeds.sort_values(by = 'seed')
df_seeds.seed = df_seeds.apply(lambda x: str(int(x.seed)) if x.seed == x.seed else 'Unpruned', axis = 1 )

In [None]:
df_seeds.seed.unique().tolist()


In [None]:
df_seeds.seed = df_seeds.seed.map(seed_type_names)

In [None]:
plt.rcParams['figure.figsize'] = [10, 5]

if pr_method != 'random':
    ax = sns.boxplot(x='seed', y='metric', data=df_seeds, order= ['Unpruned', '1'])
    # add stripplot
    ax = sns.stripplot(x='seed', y='metric', data=df_seeds, color="orange", jitter=0.3, size=4, order=  ['Unpruned', '1'])
else:
    ax = sns.boxplot(x='seed', y='metric', data=df_seeds, order= ['Unpruned', '1', '2', '3', '4', '5'])
    # add stripplot
    ax = sns.stripplot(x='seed', y='metric', data=df_seeds, color="orange", jitter=0.3, size=4, order=  ['Unpruned', '1', '2', '3', '4', '5'])


ax.yaxis.grid(True) # Hide the horizontal gridlines
ax.xaxis.grid(True) # Show the vertical gridlines
# add title
ax.set(xlabel='Seed', ylabel=f'{args.eval_metric.capitalize()}', title=f"{pruning_type_names[pr_method]} / Global PR {base_percentage}%")
# show the graph
plt.savefig(f'results/{args.dataset}/SEED_BOXPLOT_{pruning_type_names[pr_method]}_{base_percentage}.png', dpi=1000)
plt.show()


### By Pruning Distribution
X axis = Pruning Method

In [None]:
df_distribution = df[((df["model_type"] == pr_distribution) & (df["pr_base"] == base_percentage)) | (df["model_type"] == "UNPRUNED")]
df_distribution["pruning_type"] = df_distribution["pruning_type"].fillna('Unpruned')

In [None]:
df_distribution.pruning_type.value_counts()

In [None]:
df_distribution.pruning_type = df_distribution.pruning_type.map(pruning_type_names)

In [None]:
df_distr = df_distribution[(df_distribution.finetuned == True) | (df_distribution.pruning_type == 'Unpruned')][['pruning_type','metric']]

In [None]:
plt.rcParams['figure.figsize'] = [10, 5]

ax = sns.boxplot(x='pruning_type', y='metric', data=df_distr, order= ['Unpruned', 'Random', 'Weight', 'SeNPIS Faster'])
# add stripplot
ax = sns.stripplot(x='pruning_type', y='metric', data=df_distr, color="orange", jitter=0.3, size=4, order=  ['Unpruned', 'Random', 'Weight', 'SeNPIS Faster'])


ax.yaxis.grid(True) # Hide the horizontal gridlines
ax.xaxis.grid(True) # Show the vertical gridlines
# add title
ax.set(xlabel='Pruning Methods', ylabel=f'{args.eval_metric.capitalize()}', title=f"{model_type_names[pr_distribution]} / Global PR {base_percentage}%")
# show the graph
plt.savefig(f'results/{args.dataset}/DISTRIBUTION_BOXPLOT_{pr_distribution}_{base_percentage}.png', dpi=1000)
plt.show()
