In [None]:
import sys
import torch
import torch.optim as optim
import torchvision
import argparse
import torch.nn as nn
import pandas as pd
import re
import os
import seaborn as sns
from torchvision import transforms
from matplotlib.pyplot import figure
from matplotlib import pyplot as plt
from utils import *

sys.argv = ['']

parser = argparse.ArgumentParser(description='Parameters training')
parser.add_argument('--model_architecture', type=str, default="VGG16", help='....')
parser.add_argument('--dataset', type=str, default="CIFAR10", help='....')
parser.add_argument('--batch_size', type=int, default=8, help='....')
parser.add_argument('--device', type=str, default=None, help='....')
parser.add_argument('--eval_metric', default="accuracy", help='....')

args = parser.parse_args()

if args.device is None:
    import torch
    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
##
# Modify if necessary
args.dataset = "Tomato_Leaves"
args.eval_metric = "f1_score"
base_percentage = 50
##


model_directory = f"models/{args.dataset}"
model_paths = []

for filename in os.listdir(model_directory):
    if filename.endswith(".pth"):
        model_path = os.path.join(model_directory, filename)
        model_paths.append(model_path)

In [None]:
_, test_loader, num_classes = get_dataset(args)

In [None]:
df = pd.DataFrame({'model_paths': model_paths})
df['model_type'] = df['model_paths'].apply(lambda x: re.search(fr'(HOMOGENEA|CRECIENTE|DECRECIENTE|MAS_MENOS_MAS|MENOS_MAS_MENOS|{args.model_architecture}_{args.dataset}_UNPRUNED)', x).group())
df['finetuned'] = df['model_paths'].apply(lambda x: 'FT' in x)
df['seed'] = df['model_paths'].apply(lambda x: re.search(r'(?<=SEED_)\d+', x).group() if re.search(r'(?<=SEED_)\d+', x) else None)
df

In [None]:
df['metric'] = 0
criterion = nn.CrossEntropyLoss()
for i,model_path in enumerate(model_paths):
    model = torch.load(model_path)
    test_loss, test_acc = test_epoch(model, args.device, test_loader, criterion, args.eval_metric, num_classes)
    if torch.is_tensor(test_acc):
        test_acc = test_acc.item()
    df['metric'].iloc[i] = test_acc
    print(f"{args.eval_metric} del modelo {model_path}: {test_acc:.3f}")

In [None]:
if not os.path.exists(f"results/{args.dataset}"):
    os.makedirs(f"results/{args.dataset}")
df.to_csv(f'results/{args.dataset}/results_{args.dataset}_pruning_{base_percentage}.csv')

In [None]:
df = pd.read_csv(f'results/{args.dataset}/results_{args.dataset}_pruning_{base_percentage}.csv')

In [None]:
df.model_type.value_counts()

In [None]:
model_type_names = {f'{args.model_architecture}_{args.dataset}_UNPRUNED':'original',
                    'CRECIENTE':'PR_2',
                    'DECRECIENTE':'PR_3',
                    'HOMOGENEA':'PR_1',
                    'MAS_MENOS_MAS':'PR_5',
                    'MENOS_MAS_MENOS':'PR_4'
                   }
df.model_type = df.model_type.map(model_type_names)

In [None]:
df_pruned = df[(df.finetuned == True) | (df.model_type == 'original')][['model_type','metric']]

In [None]:
plt.rcParams['figure.figsize'] = [10, 5]


ax = sns.boxplot(x='model_type', y='metric', data=df_pruned, order=["original", "PR_1", "PR_2", "PR_3", "PR_4", "PR_5"])
# add stripplot
ax = sns.stripplot(x='model_type', y='metric', data=df_pruned, color="orange", jitter=0.3, size=4, order=["original", "PR_1", "PR_2", "PR_3", "PR_4", "PR_5"])

ax.yaxis.grid(True) # Hide the horizontal gridlines
ax.xaxis.grid(True) # Show the vertical gridlines
# add title
ax.set(xlabel='Pruning Distributions', ylabel=f'{args.eval_metric}')
plt.savefig(f'results/{args.dataset}/PR_BOXPLOT.png', dpi=1000)
# show the graph
plt.show()


In [None]:
df_seeds = df[(df.finetuned == True) | (df.model_type == 'original')][['seed','metric']]
df_seeds = df_seeds.sort_values(by = 'seed')
df_seeds.seed = df_seeds.apply(lambda x: str(int(x.seed)) if x.seed == x.seed else 'original', axis = 1 )

In [None]:
df_seeds.seed.unique().tolist()


In [None]:
seed_type_names = {'23':'1',
                    '42':'2',
                    '97':'3',
                    '112':'4',
                    '167':'5',
                    '214':'6',
                    '256':'7',
                    '333':'8',
                    '425':'9',
                    '512':'10',
                    'original':'original'
                   }

In [None]:
df_seeds.seed = df_seeds.seed.map(seed_type_names)

In [None]:
plt.rcParams['figure.figsize'] = [10, 5]

ax = sns.boxplot(x='seed', y='metric', data=df_seeds, order= ['original', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10'])
# add stripplot
ax = sns.stripplot(x='seed', y='metric', data=df_seeds, color="orange", jitter=0.3, size=4, order=  ['original', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10'])

ax.yaxis.grid(True) # Hide the horizontal gridlines
ax.xaxis.grid(True) # Show the vertical gridlines
# add title
ax.set(xlabel='Seed', ylabel=f'{args.eval_metric}')
# show the graph
plt.savefig(f'results/{args.dataset}/SEED_BOXPLOT.png', dpi=1000)
plt.show()
