In [1]:
!pip install pandas

Collecting pandas
  Downloading pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.2/12.2 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting pytz>=2020.1
  Downloading pytz-2022.7.1-py2.py3-none-any.whl (499 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m499.4/499.4 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: pytz, pandas
Successfully installed pandas-1.5.3 pytz-2022.7.1
[0m

In [21]:
import os
import csv
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from contextlib import contextmanager
from utils import npy_save

In [3]:
def get_files(method:str, selection):
    path = f'/workspace/accuracy/{selection}/{method}/'
    file_list = os.listdir(path+'csv_files')
    file_list_csv = [file for file in file_list if file.endswith('.csv')]
    seeds = []
    pattern = f'{method}_sparse_output_([0-9]+).csv'
    for file in file_list_csv:
        result = re.search(pattern, file)
        seeds.append(result.group(1))
    return path, file_list_csv, seeds

In [4]:
def data_sorting(file_str):
    data = pd.read_csv(file_str)
    data_sorted = data.sort_values(by="sparsity").drop_duplicates(subset=['sparsity'], keep = 'first')
    return data_sorted

In [5]:
@contextmanager
def savefig(path_str,filename):
    dir_name = os.path.dirname(path_str)
    if not os.path.isdir(dir_name):
        os.makedirs(dir_name)
    yield plt.savefig(path_str+filename)

In [22]:
def save_means(method:str, selection = [1,0]):
    path, file_list, seeds = get_files(method, selection)
    sorted_datas = []
    total_results = []
    for i in range(len(file_list)):
        sorted_datas.append(data_sorting(path+'csv_files/'+file_list[i]))
    results = pd.concat(sorted_datas)
    sparsitys = list(results['sparsity'].drop_duplicates())

    for i in list(sparsitys):
        total_results.append(list(results['accuracy'][results['sparsity'] == i]))

    with npy_save(path + 'mean_result.npy', np.array(total_results)) as npy:
        npy
    with npy_save(path + 'sparsity.npy', np.array(sparsitys)) as npy:
        npy
    return None

In [39]:
def draw_diff_select(method:str, selections:list):
    means = []
    for selection in selections:
        mean = np.load(f'/workspace/accuracy/{selection}/{method}/mean_result.npy')
        sparsity = np.load(f'/workspace/accuracy/{selection}/{method}/sparsity.npy')
        means.append((mean, sparsity))
    for mean in means:
        print(mean)
        plt.plot(mean[1], mean[0])
    plt.savefig(f'/workspace/accuracy/diff_select_graph_{method}.png')

In [26]:
save_means('threshold', [5,9])

In [40]:
draw_diff_select(method='threshold', selections=['[1, 7]', '[5, 9]'])

(array([[0.9765625],
       [0.9765625],
       [0.96875  ],
       [0.9765625],
       [0.9609375],
       [0.96875  ],
       [0.9765625],
       [0.96875  ],
       [0.96875  ],
       [0.953125 ],
       [0.9453125],
       [0.953125 ],
       [0.9609375],
       [0.9765625],
       [0.96875  ],
       [0.96875  ],
       [0.96875  ],
       [0.96875  ],
       [0.953125 ],
       [0.9765625],
       [0.96875  ],
       [0.4765625],
       [0.9765625],
       [0.96875  ],
       [0.9296875],
       [0.3046875],
       [0.9609375],
       [0.921875 ],
       [0.9453125],
       [0.6953125],
       [0.3359375],
       [0.9453125],
       [0.96875  ],
       [0.96875  ],
       [0.8125   ],
       [0.875    ],
       [0.796875 ],
       [0.8515625],
       [0.875    ],
       [0.9609375],
       [0.3515625],
       [0.6484375],
       [0.4921875],
       [0.765625 ],
       [0.9609375],
       [0.6015625],
       [0.484375 ],
       [0.3671875],
       [0.9765625],
       [0.5625   ],

In [6]:
def save_graphs(method:str, selection):
    path, file_list, seeds = get_files(method, selection=selection)
    for i in range(len(file_list)):
        data = data_sorting(path+'csv_files/'+file_list[i])
        sparsity = np.array(data["sparsity"])
        accuracy = np.array(data["accuracy"])
        plt.plot(sparsity,accuracy)
        plt.xlabel("insparsity")
        plt.ylabel("accuracy")
        plt.title(f"{method}_{seeds[i]}")
        with savefig(path+'eachfig/', f"{method}_{seeds[i]}.png") as save:
            save
        plt.clf()

In [7]:
def box_graphs(method:str, selection):
    path, file_list, seeds = get_files(method, selection)
    sorted_datas = []
    total_results = []
    for i in range(len(file_list)):
        sorted_datas.append(data_sorting(path+'csv_files/'+file_list[i]))
    results = pd.concat(sorted_datas)
    sparsitys = list(results['sparsity'].drop_duplicates())

    for i in list(sparsitys):
        total_results.append(list(results['accuracy'][results['sparsity'] == i]))
    plt.boxplot(total_results)
    plt.xticks(range(1,len(sparsitys)+1), sparsitys)
    plt.title(f'boxplot of {method}_sampling{len(file_list)}')
    plt.xlabel("insparsity")
    plt.ylabel('accuracy')
    with savefig(path+'meanfig/', f"{method}.png") as save:
        save
    plt.clf()

In [8]:
def mean_graphs(method:str, selection):
    path, file_list, seeds = get_files(method, selection)
    sorted_datas = []
    total_results = []
    for i in range(len(file_list)):
        sorted_datas.append(data_sorting(path+'csv_files/'+file_list[i]))
    results = pd.concat(sorted_datas)
    sparsitys = list(results['sparsity'].drop_duplicates())

    for i in list(sparsitys):
        total_results.append(np.array(results['accuracy'][results['sparsity'] == i]).mean())
    plt.plot(sparsitys, np.array(total_results))
    plt.title(f'meanplot of {method}_sampling{len(file_list)}')
    plt.xlabel("insparsity")
    plt.ylabel('accuracy')
    with savefig(path+'meanfig/', f"{method}_mean.png") as save:
        save
    plt.clf()

In [9]:
def log_graphs(method:str, selection):
    path, file_list, seeds = get_files(method, selection)
    sorted_datas = []
    total_results = []
    for i in range(len(file_list)):
        sorted_datas.append(data_sorting(path+'csv_files/'+file_list[i]))
    results = pd.concat(sorted_datas)
    sparsitys = list(results['sparsity'].drop_duplicates())

    for i in list(sparsitys):
        total_results.append(np.array(results['accuracy'][results['sparsity'] == i]).mean())
    plt.plot(sparsitys, np.array(total_results))
    plt.xscale('log')
    plt.yscale('log')
    plt.title(f'logx, logy plot of {method}_sampling{len(file_list)}')
    plt.xlabel("insparsity")
    plt.ylabel('accuracy')
    with savefig(path+'meanfig/', f"{method}_log.png") as save:
        save
    plt.clf()

In [10]:
def logx_graphs(method:str, selection):
    path, file_list, seeds = get_files(method, selection)
    sorted_datas = []
    total_results = []
    for i in range(len(file_list)):
        sorted_datas.append(data_sorting(path+'csv_files/'+file_list[i]))
    results = pd.concat(sorted_datas)
    sparsitys = list(results['sparsity'].drop_duplicates())

    for i in list(sparsitys):
        total_results.append(np.array(results['accuracy'][results['sparsity'] == i]).mean())
    plt.plot(sparsitys, np.array(total_results))
    plt.xscale('log')
    plt.title(f'logx plot of {method}_sampling{len(file_list)}')
    plt.xlabel("insparsity")
    plt.ylabel('accuracy')
    with savefig(path+'meanfig/', f"{method}_logx.png") as save:
        save
    plt.clf()

In [11]:
def logy_graphs(method:str, selection):
    path, file_list, seeds = get_files(method, selection)
    sorted_datas = []
    total_results = []
    for i in range(len(file_list)):
        sorted_datas.append(data_sorting(path+'csv_files/'+file_list[i]))
    results = pd.concat(sorted_datas)
    sparsitys = list(results['sparsity'].drop_duplicates())

    for i in list(sparsitys):
        total_results.append(np.array(results['accuracy'][results['sparsity'] == i]).mean())
    plt.plot(sparsitys, np.array(total_results))
    plt.yscale('log')
    plt.title(f'logy plot of {method}_sampling{len(file_list)}')
    plt.xlabel("insparsity")
    plt.ylabel('accuracy')
    with savefig(path+'meanfig/', f"{method}_logy.png") as save:
        save
    plt.clf()

In [12]:
method_list=['block', 'random', 'origin', 'diagonal']
def draw_all_graphs(graph_fun, selection=[1,0], method_list=method_list):
    for method_str in method_list:
        graph_fun(method_str, selection)
    return None

In [15]:
draw_all_graphs(box_graphs, selection=[1,7], method_list=["diagonal"])

In [16]:
draw_all_graphs(save_graphs, selection=[1,7], method_list=["diagonal"])

In [17]:
draw_all_graphs(mean_graphs, selection=[1,7], method_list=["diagonal"])

In [18]:
draw_all_graphs(log_graphs, selection=[1,7], method_list=["diagonal"])

In [19]:
draw_all_graphs(logx_graphs, selection=[1,7], method_list=["diagonal"])

In [20]:
draw_all_graphs(logy_graphs, selection=[1,7], method_list=["diagonal"])