In [None]:
# if you dont have matplotlib, numpy or pandas installed on your machine uncomment this lines and pip install them:
# %pip install matplotlib
# %pip install numpy
# %pip install pandas

# and of course install quibbler!
%pip install pyquibbler

In [None]:
from pyquibbler import iquib, initialize_quibbler, q, quiby
initialize_quibbler()
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
import matplotlib.patches as patches
import numpy as np
import pandas as pd
%matplotlib tk

In [None]:
embedding_matrix = np.load('data/embedding_matrix.npy')
df = pd.read_pickle('data/ICD_df.pkl')
TSNE_2d_result = np.load('data/TSNE_2d_result.npy')
chapters_abb = pd.read_pickle('data/chapters_abb.pkl')

In [None]:
@quiby
def filter_by_frequency(index_and_freq, frequency_threshold):
    return index_and_freq[index_and_freq[:,1]>=frequency_threshold][:,0]

@quiby
def get_indecies_from_array(array, indecies):
    return array[indecies]

@quiby
def points_in_ellipse_indecies(center, width, height, angle, points):
    # calculate the rotation matrix
    R = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
    # rotate the points
    rotated_points = np.dot(points-center, R)
    # calculate the ellipse equation
    ellipse = (rotated_points[:,0]/width)**2 + (rotated_points[:,1]/height)**2
    # return the indecies of points inside the ellipse
    return np.where(ellipse<=1)[0]

In [None]:
# define variables to control scatter plot
frequency_threshold = iquib(100)
index_and_freq = df['frequency'].reset_index().values
df['factorized_ICD9_chapter'] = pd.factorize(df['ICD9_chapter'])[0]
chapter_and_freq = df[['factorized_ICD9_chapter', 'frequency']].values
indexes_to_plot = filter_by_frequency(index_and_freq, frequency_threshold)
chapter_to_plot = filter_by_frequency(chapter_and_freq, frequency_threshold)
rows_to_plot = get_indecies_from_array(TSNE_2d_result, indexes_to_plot)

# create scatter plot of TSNE
fig = plt.figure(figsize=(20,10))
ax_scatter = fig.add_axes([0, 0, 0.5, 1])
ax_scatter.scatter(rows_to_plot[:,0], rows_to_plot[:,1], s=3, c=chapter_to_plot)

# define ellipse quibbed variables
center = iquib(np.array([35., -28.]))
width = iquib(15.)
height = iquib(10.)
angle = iquib(0.)

# plot ellipse using patches
ax_scatter.add_patch(patches.Ellipse(center, width, height, angle=angle, fill=False, color='r', linewidth=2))
ax_scatter.plot(center[0], center[1], '+', c='r', markersize=10, markeredgewidth=2)

# add slider for ellipse variables
ax_width = fig.add_axes([0.6, 0.05, 0.35, 0.05])
ax_width.set_xlim(0, 10)
slider_width = Slider(ax_width, 'Width', 0, 20, valinit=width, valstep=0.1)
ax_height = fig.add_axes([0.6, 0.1, 0.35, 0.05])
ax_height.set_xlim(0, 10)
slider_height = Slider(ax_height, 'Height', 0, 20, valinit=height, valstep=0.1)
ax_angle = fig.add_axes([0.6, 0.15, 0.35, 0.05])
ax_angle.set_xlim(0, 360)
slider_angle = Slider(ax_angle, 'Angle', 0, 360, valinit=angle, valstep=1)

# add slider for minimum frequency of ICD9 code in the dataset
ax_freq = fig.add_axes([0.6, 0.20, 0.35, 0.05])
ax_freq.set_xlim(1,5000)
Slider(ax_freq, valmin=1, valmax=5000, valinit=frequency_threshold, label='Min Frequency')

# plot bars that represent the number of diseases inside the ellipse and add threshold to define cluster
cluster_threshold = iquib(0.5)
ax_bars = fig.add_axes([0.5, 0.35, 0.5, 0.65])
ax_bars.bar(list(chapters_abb.values()), q(np.bincount, chapter_to_plot[points_in_ellipse_indecies(center, width, height, angle, rows_to_plot)], minlength=len(list(chapters_abb.values())))/np.sum(q(np.bincount, chapter_to_plot[points_in_ellipse_indecies(center, width, height, angle, rows_to_plot)], minlength=len(list(chapters_abb.values())))))
ax_bars.set_xticks(list(chapters_abb.values()))
ax_bars.set_xticklabels(list(chapters_abb.values()), rotation=90)
ax_bars.hlines(y=cluster_threshold, xmin=-1, xmax=len(list(chapters_abb.values())), linestyles='--', color='r')
ax_bars.plot([-1], cluster_threshold, marker=">", markersize=10)

plt.show()