In [1]:
from pathlib import Path
import json
import re

import torch
from torch import Tensor
import torch.optim as optim
from torch.utils.data import Dataset
from torch import nn
from torch import functional as F

import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib import image as mpl_image
from typing import Type, Any, Callable, Union, List, Optional

from hmmlearn import hmm


In [None]:
from centrilyze import CentrioleImageFiles, ImageDataset, CentrioleImageModel, HMM, constants, image_transform, target_transform, annotate

In [2]:
batch_size = 4

classes = {"Not_Oriented": 0, "Oriented": 1, "Precieved_Not_Oriented": 2, "Precieved_Oriented": 3, "Slanted": 4, "Unidentified": 5}

In [3]:
test_folder = Path("/nic/data/high_low/train")
model_file = Path("/nic/models/model_resnet_18_high_low_affine_149.pyt")
annotations_file = Path("/nic/annotations.json")
sequences_file = Path("/nic/sequences.npy")
emission_matrix_path = Path("/nic/emission_matrix.npy")
emission_matrix_path_three_classes = Path("/nic/emission_matrix_three_classes.npy")

n_iter=1000

In [4]:
centriole_image_files = CentrioleImageFiles.from_unannotated_images(test_folder)

NameError: name 'CentrioleImageFiles' is not defined

In [9]:
testset = ImageDataset.from_centriole_image_files(
    centriole_image_files, 
    image_transform, 
    target_transform,
)

In [None]:
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, 
                                         drop_last=True
                                        )

# Define the model

In [10]:
model = CentrioleImageModel()

# Load the model

In [13]:
model = CentrioleImageModel()


In [14]:
model.load_state_dict(torch.load(str(model_file), map_location=torch.device("cpu")))


<All keys matched successfully>

# Annotate the data with the model

In [15]:
annotations = annotate(image_model, dataloader)

# Output the confusion matrix

In [16]:
confusion_matrix_test_table = get_confusion_matrix(annotations)

In [17]:
confusion_matrix_test_table

Unnamed: 0,Not_Oriented,Oriented,Precieved_Not_Oriented,Precieved_Oriented,Slanted,Unidentified
Not_Oriented,0.0,0.0,0.0,0.0,0.0,0.0
Oriented,0.0,0.0,0.0,0.0,0.0,0.0
Precieved_Not_Oriented,0.0,0.0,0.0,0.0,0.0,0.0
Precieved_Oriented,0.0,0.0,0.0,0.0,0.0,0.0
Slanted,0.0,0.0,0.0,0.0,0.0,0.0
Unidentified,2460.0,4360.0,5657.0,6944.0,5515.0,180.0


# Save to json

In [3]:
# annotations

In [19]:
new_annotations = {}

for path, annotation in annotations.items():
    regex = "_particle\[([0-9]+)\]_frame\[([0-9]+)\]"
    capture = re.findall(regex, path)
    particle = int(capture[0][0])
    frame = int(capture[0][1])
    try:
        new_annotations[particle][frame] = annotation
    except:
        new_annotations[particle] = {}
        new_annotations[particle][frame] = annotation

In [20]:
sequences = {}
ordered_annotations = {}

k = 0
sequence = []
for particle in new_annotations:
    ordered_annotations[particle] = []
    
    for j in range(20):
        try:
            next_annotation = new_annotations[particle][j]
            sequence.append(next_annotation)
        except:
            sequence.append(6)
    sequences[k] = sequence
    k = k + 1
    sequence = []
    

In [2]:
# sequences

In [22]:
full_sequences = [sequence for sequence in sequences.values() if len(sequence) == 20]

In [23]:
sequences_array = np.array(list(full_sequences))
sequences_array.shape

(269, 20)

In [24]:
lengths = [sequences_array.shape[1]]*sequences_array.shape[0]

In [25]:
np.save(str(sequences_file), sequences_array)

In [26]:
naive_transition_matrix = np.zeros((7,7))
for sequence in sequences.values():
    prev_state = 6
    for state in sequence:
        naive_transition_matrix[prev_state, state] += 1
        prev_state = state


In [27]:

classes = {
    "Not_Oriented": 0, 
    "Oriented": 1, 
    "Precieved_Not_Oriented": 2, 
    "Precieved_Oriented": 3, 
    "Slanted": 4, 
    "Unidentified": 5, 
    "No_sample": 6,
}
naive_transition_matrix = naive_transition_matrix / np.sum(naive_transition_matrix, axis=1).reshape((-1,1))
print(np.sum(naive_transition_matrix, axis=1))

naive_transition_table = pd.DataFrame(
data=naive_transition_matrix,
index=list(classes.keys()),
columns=list(classes.keys()))
 

[1. 1. 1. 1. 1. 1. 1.]


## Output the naive transition matrix

In [28]:
naive_transition_table

Unnamed: 0,Not_Oriented,Oriented,Precieved_Not_Oriented,Precieved_Oriented,Slanted,Unidentified,No_sample
Not_Oriented,0.329114,0.016878,0.50211,0.025316,0.122363,0.004219,0.0
Oriented,0.00274,0.320548,0.076712,0.427397,0.161644,0.010959,0.0
Precieved_Not_Oriented,0.09375,0.02178,0.390152,0.144886,0.317235,0.027462,0.004735
Precieved_Oriented,0.009181,0.099576,0.09322,0.42726,0.363701,0.006356,0.000706
Slanted,0.011936,0.031136,0.169694,0.265698,0.499222,0.022314,0.0
Unidentified,0.010204,0.020408,0.306122,0.204082,0.295918,0.163265,0.0
No_sample,0.092527,0.096085,0.185053,0.224199,0.33452,0.02847,0.039146


# Hidden Markov model

## Load the emission matrix

In [29]:
emission_matrix_np = np.load(emission_matrix_path)

In [30]:
emission_matrix_np

array([[6.43678161e-01, 0.00000000e+00, 3.39901478e-01, 1.64203612e-03,
        1.31362890e-02, 1.64203612e-03, 0.00000000e+00],
       [0.00000000e+00, 8.45682451e-01, 5.57103064e-04, 1.50417827e-01,
        2.22841226e-03, 1.11420613e-03, 0.00000000e+00],
       [2.91530945e-01, 0.00000000e+00, 4.62540717e-01, 5.70032573e-03,
        2.39413681e-01, 8.14332248e-04, 0.00000000e+00],
       [0.00000000e+00, 4.34234234e-01, 0.00000000e+00, 5.19819820e-01,
        4.59459459e-02, 0.00000000e+00, 0.00000000e+00],
       [1.41998908e-02, 8.19224468e-03, 1.60567996e-01, 2.43036592e-01,
        5.74003277e-01, 0.00000000e+00, 0.00000000e+00],
       [3.54430380e-02, 2.78481013e-02, 1.82278481e-01, 2.86075949e-01,
        3.36708861e-01, 1.31645570e-01, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])

In [31]:
emission_matrix_np.shape

(7, 7)

## Define the model

In [32]:
model = hmm.MultinomialHMM(n_components=7, n_iter=n_iter, params="st", init_params="st")

In [33]:
model.emissionprob_ = emission_matrix_np

In [34]:
sequences_array.shape

(269, 20)

## Fit the model

In [35]:
model.fit(sequences_array.reshape(-1, 1), lengths)

MultinomialHMM(init_params='st', n_components=7, n_iter=1000, params='st',
               random_state=RandomState(MT19937) at 0x21AED1BE048)

## Format the model output

In [36]:
classes = {
    "Not_Oriented": 0, 
    "Oriented": 1, 
    "Precieved_Not_Oriented": 2, 
    "Precieved_Oriented": 3, 
    "Slanted": 4, 
    "Unidentified": 5, 
    "No_sample": 6,
}
transition_table = pd.DataFrame(
data=model.transmat_,
index=list(classes.keys()),
columns=list(classes.keys()))


## Output the transition matrix

In [37]:
transition_table

Unnamed: 0,Not_Oriented,Oriented,Precieved_Not_Oriented,Precieved_Oriented,Slanted,Unidentified,No_sample
Not_Oriented,0.8489245,5.457615e-103,0.1510755,4.007009e-68,3.538721e-11,5.266096e-17,7.706415e-242
Oriented,6.44554e-36,0.8325385,0.02241513,0.1450288,1.421102e-11,1.754941e-05,0.0
Precieved_Not_Oriented,1.3927e-13,3.157496e-17,0.8845026,0.001289653,0.01352891,0.09155469,0.009124108
Precieved_Oriented,4.836323e-13,3.870289e-18,0.003582112,0.8451044,0.1326371,0.0173587,0.001317704
Slanted,1.064527e-37,8.389936999999999e-26,2.27083e-12,0.02874218,0.9218155,0.04944232,2.640986e-50
Unidentified,2.730901e-22,6.35758e-27,0.04916587,0.04498121,0.1901893,0.7156636,2.166236e-47
No_sample,8.838939e-18,0.0,0.08333333,0.0,2.232072e-74,2.005765e-65,0.9166667


# Hidden Markov model: Three Classes

In [1]:
# annotations

In [39]:
annotation_mapping = {
    0: 0, 
    1: 1, 
    2: 0, 
    3: 1, 
    4: 2, 
    5: 3, 
    6: 4,
}

In [40]:
new_annotations = {}

for path, annotation in annotations.items():
    regex = "_particle\[([0-9]+)\]_frame\[([0-9]+)\]"
    capture = re.findall(regex, path)
    particle = int(capture[0][0])
    frame = int(capture[0][1])
    try:
        new_annotations[particle][frame] = annotation_mapping[annotation]
    except:
        new_annotations[particle] = {}
        new_annotations[particle][frame] = annotation_mapping[annotation]

In [41]:
sequences = {}
ordered_annotations = {}

k = 0
sequence = []
for particle in new_annotations:
    ordered_annotations[particle] = []
    
    for j in range(20):
        try:
            next_annotation = new_annotations[particle][j]
            sequence.append(next_annotation)
        except:
            sequence.append(4)
    sequences[k] = sequence
    k = k + 1
    sequence = []
    

In [42]:
full_sequences = [sequence for sequence in sequences.values() if len(sequence) == 20]

In [5]:
# full_sequences

In [44]:
sequences_array = np.array(list(full_sequences))
sequences_array.shape

(269, 20)

In [45]:
lengths = [sequences_array.shape[1]]*sequences_array.shape[0]

In [46]:
np.save(str(sequences_file), sequences_array)

In [47]:
naive_transition_matrix = np.zeros((5,5))
for sequence in sequences.values():
    prev_state = 4
    for state in sequence:
        naive_transition_matrix[prev_state, state] += 1
        prev_state = state


In [None]:
naive_transition_matrix

In [48]:

classes = {
    "Not_Oriented": 0, 
    "Oriented": 1, 
    "Slanted": 2, 
    "Unidentified": 3, 
    "No_sample": 4,
}
naive_transition_matrix = naive_transition_matrix / np.sum(naive_transition_matrix, axis=1).reshape((-1,1))
print(np.sum(naive_transition_matrix, axis=1))

naive_transition_table = pd.DataFrame(
data=naive_transition_matrix,
index=list(classes.keys()),
columns=list(classes.keys()))
 

[1. 1. 1. 1. 1.]


## Load the emission matrix

In [49]:
emission_matrix_np = np.load(emission_matrix_path_three_classes)

In [50]:
emission_matrix_np

array([[8.52476864e-01, 2.72182907e-03, 1.43168209e-01, 1.63309744e-03,
        0.00000000e+00],
       [1.03270224e-03, 9.76936317e-01, 2.13425129e-02, 6.88468158e-04,
        0.00000000e+00],
       [1.97160022e-01, 2.29382851e-01, 5.72910978e-01, 5.46149645e-04,
        0.00000000e+00],
       [2.91139241e-01, 3.18987342e-01, 2.48101266e-01, 1.41772152e-01,
        0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        1.00000000e+00]])

## Define the model

In [51]:
model = hmm.MultinomialHMM(n_components=5, n_iter=n_iter, params="st", init_params="st")

In [52]:
model.emissionprob_ = emission_matrix_np

## Fit the model

In [53]:
model.fit(sequences_array.reshape(-1, 1), lengths)

MultinomialHMM(init_params='st', n_components=5, n_iter=1000, params='st',
               random_state=RandomState(MT19937) at 0x21AED1BE048)

## Format the model output

In [58]:
classes = {
    "Not_Oriented": 0, 
    "Oriented": 1, 
    "Slanted": 2, 
    "Unidentified": 3, 
    "No_sample": 4,
}
transition_table = pd.DataFrame(
data=model.transmat_,
index=list(classes.keys()),
columns=list(classes.keys()))


## Output the transition matrix

In [59]:
pd.option_context('precision', 3)

<pandas._config.config.option_context at 0x21aff63a288>

In [60]:
transition_table

Unnamed: 0,Not_Oriented,Oriented,Slanted,Unidentified,No_sample
Not_Oriented,0.9197867,0.003656876,0.02941763,0.04052531,0.006613449
Oriented,0.007301879,0.8521219,0.13724,0.002251444,0.001084738
Slanted,3.565343e-09,0.04194937,0.9037174,0.05433323,1.146448e-56
Unidentified,0.04394421,0.02907449,0.2629513,0.66403,2.098e-49
No_sample,0.08333333,1.204606e-243,6.187378e-72,1.0582300000000001e-54,0.9166667
