# MatrixViewer

# Setup Environment

In [None]:
from timeit import default_timer as timer
import numpy as np
import holoviews as hv
import datashader as ds
import pandas as pd
import holoviews.operation.datashader as hd
from holoviews.operation.datashader import aggregate, shade, datashade, dynspread, stack
from datashader import transfer_functions as tf
from holoviews.operation import decimate
from IPython.core.display import display, HTML
from datashader.colors import Sets1to3 # default datashade() and shade() color cycle
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
display(HTML("<style>.container { width:100% !important; height:100% important}</style>"))
hv.extension('bokeh')
hv.notebook_extension('bokeh')
decimate.max_samples=1000
dynspread.max_px=20
dynspread.threshold=0.5
plot_width  = int(1200)
plot_height = int(plot_width//1.2)




In [None]:
hv.help(datashade)

# Read Data and dump pickle files

In [None]:
import sys
import re
from collections import OrderedDict
import pickle
import os

n_max_num_matrices_per_file = 2
n_max_lines = sys.maxsize

start = timer()
file_list = ["MATRICES/matrix.txt"] # List of matries to read in
n_lines = 0
regex_1 = r" *G *= *\["  # Start of matrix read
regex_2 = r" *([0-9]+) *([0-9]+) *([0-9\+-\.e]+)" # Matrix entries: int int float
regex_3 = r" *\]" # End of matrix read
max_row_id = -1
max_col_id = -1
min_val = sys.maxsize
max_val = -sys.maxsize
n_lower = 0
n_upper = 0
n_zero = 0
n_diag = 0
data_frames = OrderedDict()
for file in file_list:
    orig_file_name = re.split('\.',file)[0]
    file_name = re.sub("/","-",orig_file_name)
    data_name = ""
    mat_num = 0
    read_matrix = False
    with open(file,'r') as file:
        for line in file:
            n_lines += 1
            if n_lines > n_max_lines:
                break
            if re.match(regex_1, line):
                read_matrix = True
                data_name = file_name + "-G" + str(mat_num)
                print(data_name)
                d_s = {'row': [], 'col': [], 'val': [] }
                continue
            if read_matrix and re.match(regex_3, line):
                print("End of " + data_name)
                data_frames[data_name] = pd.DataFrame(data=d_s)
                pickle_file_name = orig_file_name + "-G" + str(mat_num) + ".p"
                f = open(pickle_file_name, 'wb')
                pickle.dump(data_frames[data_name], f, protocol=4)
                print("created pickle file " + pickle_file_name)
                f.close()
                mat_num += 1
                if mat_num == n_max_num_matrices_per_file:
                    break
                else:
                    read_matrix = False
                    continue
            if read_matrix:
                matches = re.finditer(regex_2, line)
                for matchNum, match in enumerate(matches):
                    for groupNum in range(1, len(match.groups()) + 1):
                        if groupNum == 1:
                            row = int(match.group(groupNum))
                        elif groupNum == 2:
                            col = int(match.group(groupNum))
                        elif groupNum == 3:
                            val = float(match.group(groupNum))
                d_s['row'].append(row)
                d_s['col'].append(col)
                d_s['val'].append(val)
                max_row_id = max(max_row_id, row)
                max_col_id = max(max_col_id, col)
                min_val = min(min_val, val)
                max_val = max(max_val, val)     
                if abs(val) > 0.0:
                    if row > col:
                        n_lower += 1
                    elif col > row:
                        n_upper += 1
                    else:
                        n_diag += 1
                else:
                    n_zero += 1
end = timer()
print("Data input complete: " + str(n_lines) + " lines. " + str(end - start) + " s")
print("diag, lower, upper, zero: " + ", ".join([str(n_diag), str(n_lower), str(n_upper), str(n_zero)]))
import io
print (io.DEFAULT_BUFFER_SIZE)


# Read pickle files (For fast data load)

In [None]:
import os
import re
import pickle
from collections import OrderedDict

start = timer()
file_directories = ["MATRICES"] # List of directories containing pickled matrix files
data_frames = OrderedDict()
for directory in file_directories:
    for file in os.listdir(directory):
        if file.endswith(".p"):
            file_name = directory + "-" + re.split('\.',file)[0]
            f = open(os.path.join(directory, file), 'rb')
            data_frames[file_name] = pickle.load(f, encoding='bytes')
            print("loaded " + file_name)
            f.close() 
end = timer()
print(str(end - start) + " s")

# Matrix plots 

In [None]:
%%opts RGB [width=plot_width, height=plot_height, invert_xaxis=False, invert_yaxis=True, xaxis='top'] {+axiswise}
scatter_dict = {}
color_key = [('positive', '#247ffe'), ('negative', '#e65036')]
colors = hv.NdOverlay({k: hv.Points([0,0], label=str(k)).opts(style=dict(color=v)) for k, v in color_key})
for data_name in data_frames:
    df = data_frames[data_name]
    plot = {'negative': hv.Scatter(df.loc[(df['val'] < 0.0)], kdims=['col', 'row']), 'positive': hv.Scatter(df.loc[(df['val'] > 0.0)], kdims=['col', 'row'])}
    plot_data = hv.NdOverlay(plot, kdims='sign')
    scatter_dict[data_name] = plot_data

hmap = dynspread(datashade(hv.HoloMap(scatter_dict, kdims=['data_name']), aggregator=ds.count_cat('sign')), threshold=0.75, how='over') * colors
hmap

# Matrix Aggregate Data

In [None]:
%%opts QuadMesh [tools=['hover']] (alpha=0 hover_alpha=0.2)
%%opts RGB [width=plot_width, height=plot_height, invert_xaxis=False, invert_yaxis=True, xaxis='top'] {+axiswise}
from holoviews.streams import RangeXY
import colorcet as cc
ccmap = list(reversed(cc.b_diverging_bwr_55_98_c37))

# Set aggregate type
aggregate_type = "mean" 
if aggregate_type == "mean": # Display mean value of data within window
    aggr = ds.mean('val')
elif aggregate_type == "min": # Display min value of data within window
    aggr = ds.min('val')
elif aggregate_type == "max": # Display max value of data within window
    aggr = ds.max('val')
elif aggregate_type == "variance": # Display variance of data within window
    aggr = ds.var('val')
elif aggregate_type == "count": # Display number of data elements within window
    aggr = ds.count('val')
elif aggregate_type == "sum": # Display num of data within window
    aggr = ds.sum('val')


data_id = 0
value_filter = 0.0
data_names = []
for data_name in data_frames:
    print("data_id: " + str(len(data_names)) + ", data_name: " + data_name)
    df = data_frames[data_name]
    data_names.append(data_name)

df = data_frames[data_names[data_id]]
points = hv.Points(data=df.loc[(abs(df['val']) > value_filter)], kdims=['col','row'],vdims =['val'])
pts = dynspread(datashade(points, width=plot_width, height=plot_height, 
                         aggregator=ds.mean('val'),normalization='eq_hist',
                         cmap = ccmap), threshold=0.5, how='over')
(pts * hv.util.Dynamic(hd.aggregate(points, width=15, height=15, streams=[RangeXY],aggregator=aggr),
                               operation=hv.QuadMesh).relabel("Dynamic hover"))
 