In [27]:
from extract import get_data_from_directory
from preprocessing import preprocess, train_regressor_for_native_vision, clean_cylinder_nones
from extract_rules import TS_FNN

import pandas as pd

In [3]:
data = list(get_data_from_directory('./decoded'))

In [4]:
data = preprocess(data)

In [5]:
dataframe = pd.DataFrame(data)

In [164]:
from sklearn.neural_network import MLPRegressor
# from sklearn import svm

def train_regressor_for_native_vision(data):
    data = [clean_cylinder_nones(d) for d in data]
    visions = [
            (0., 0., 0., 0.),
            (1, 0., 0., 1.),
            (0.9, -0.5, 0., 1.),
            (0.5, -1, 0., 1.),
            (0.15, -1.5, 0., 1.),
            (0.1, -2, 0., 1.),
            (0.07, -3, 0., 1.),
            (0.06, -4, 0., 1.),
            (0.05, -5, 0., 1.),
            (0.04, -6, 0., 1.),
            (0.4, 0., -1., 1.),
            (0.9, 0., -0.5, 1.),
            (0.5, 0., -1, 1.),
            (0.15, 0., -1.5, 1.),
            (0.1, 0., -2, 1.),
            (0.07,0., -3, 1.),
            (0.06, 0., -4, 1.),
            (0.05, 0., -5, 1.),
            (0.04, 0., -6, 1.),
            (0.7, -0.5, -0.5, 1.),
            (0.4, -1, -0.5, 1.),
            (0.1, -1.5, -1., 1.),
            (0.07, -2, -1., 1.),
            (0.04, -3, -2., 1.),
            (0.05, -4, -2., 1.),
            (0.04, -5, -3., 1.),
            (0.02, -6, -4., 1.),
        ]
    visions += 10*visions
    visions += [(d['right_native'], d['correction_right_sphere'], d['correction_right_cylinder'], d['corrected_right']) for d in data]
    visions += [(d['left_native'], d['correction_left_sphere'], d['correction_left_cylinder'], d['corrected_left']) for d in data]
    samples = [[float(value) if value != '1/~' else 0 for value in v] for v in visions if all(value != None for value in v)]
    X = [s[1:] for s in samples]
    y = [s[0] for s in samples]
    regressor = MLPRegressor((5, 5), random_state=1, max_iter=500).fit(X, y)
    return lambda x: min(max(0, round(regressor.predict([x])[0], 3)), 1)

In [165]:
regressor = train_regressor_for_native_vision(data)

In [173]:
regressor((-4, 0, 1))

0.062

In [None]:
dataframe[:20]

Unnamed: 0,sex,main_diag,age,right_native,correction_right_sphere,correction_right_cylinder,corrected_right,left_native,correction_left_sphere,correction_left_cylinder,corrected_left
0,f,H52.4,56,,0.4,0.0,1.0,,0.5,0.0,1.0
1,f,H52.1,13,,-0.75,-0.5,1.0,,,,
2,f,H52.1,12,0.4,-0.75,0.0,1.0,0.4,-0.75,0.0,1.0
3,f,H35.0,68,1.0,,,,0.7,0.5,0.0,1.0
4,f,H35.0,71,,0.75,0.0,0.9,,1.25,0.0,0.9
5,m,H52.1,10,,-1.25,0.0,1.0,,-1.0,0.0,1.0
6,f,H10.4,75,0.6,,-0.75,0.8,0.0,,,
7,f,H26.8,91,1/~,,,,0.0,,,
8,f,H10.4,74,0.7,0.5,0.0,0.9,0.6,0.5,0.0,0.9
9,m,H52.1,39,0.4,-0.75,-0.5,1.0,0.4,-1.5,-1.0,1.0


In [None]:
import numpy as np

def sight_to_num(sight_str):
    num = 0
    try:
        num = float(sight_str)
    except: pass
    return num

def diopters_to_num(diopters):
    if diopters == None:
        return 0
    return float(diopters)

def get_sight_from_h_state(h_state_dict):
    left = sight_to_num(h_state_dict['corrected_left']) if h_state_dict['corrected_left'] != None else sight_to_num(h_state_dict['left_native'])
    right = sight_to_num(h_state_dict['corrected_right']) if h_state_dict['corrected_right'] != None else sight_to_num(h_state_dict['right_native'])
    return left, right

def vectorize(data):
    result = []
    for d in data:
        age = int(d['age'])
        left_sphere = diopters_to_num(d['correction_left_sphere'])
        right_sphere = diopters_to_num(d['correction_right_sphere'])
        left_cylinder = diopters_to_num(d['correction_left_cylinder'])
        right_cylinder = diopters_to_num(d['correction_right_cylinder'])
        left_sight, right_sight = get_sight_from_h_state(d)

        result.append([age, max(abs(left_sphere), abs(left_cylinder)), left_sight])
        result.append([age, max(abs(right_sphere), abs(right_cylinder)), right_sight])
    return np.array(result)

def normalize(data):
    return [[d[0]/100, d[1]/10, d[2]] for d in data]

In [None]:
# from sklearn.cluster import KMeans
# from sklearn.metrics import silhouette_score
# import matplotlib.pyplot as plt

# range_n_clusters = [2, 3, 4, 5, 6, 7, 8, 9, 10]
# silhouette_scores = []
# X = normalize(vectorize(data))

# for n_clusters in range_n_clusters:
#     # Create KMeans instance
#     kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init='auto')
#     cluster_labels = kmeans.fit_predict(X)

#     # Calculate silhouette score
#     silhouette_avg = silhouette_score(X, cluster_labels)
#     silhouette_scores.append(silhouette_avg)

# # Plotting the silhouette scores
# plt.plot(range_n_clusters, silhouette_scores, marker='o')
# plt.xlabel('Number of clusters')
# plt.ylabel('Silhouette score')
# plt.title('Silhouette scores for different numbers of clusters')
# plt.xticks(range_n_clusters)
# plt.show()

In [None]:
# kmeans = KMeans(n_clusters=10, random_state=0, n_init='auto')
# kmeans.fit(X)
# kmeans.cluster_centers_
# # kmeans.predict([[0.22, 0.125, 1]])

Extracting rules

In [None]:
vectorized_data = normalize(vectorize(data))
X = np.array([data[0:2] for data in vectorized_data])
Y = np.array([data[2] for data in vectorized_data])

In [None]:
ts_fnn = TS_FNN([3, 3], [[0, 1], [1, 0], [1, 1], [1, 2], [2, 2], [2, 1]])
ts_fnn.parameters = ([[0.01, 0.30, 0.70], [0., 0.2, 0.6]],
                     ts_fnn.parameters[1],
                     ts_fnn.parameters[2],
                     ts_fnn.parameters[3])
ts_fnn.fit(X, Y)

Iteration 1 Epoch 0 log likelihood -1062.31581008674
Iteration 2 Epoch 0 log likelihood -1672.3199428958499
Iteration 3 Epoch 0 log likelihood -1620.9140026158846
Iteration 4 Epoch 0 log likelihood -1221.5956300742841
Iteration 5 Epoch 0 log likelihood -758.1517542727731
Iteration 6 Epoch 0 log likelihood -852.3556797178518
Iteration 7 Epoch 0 log likelihood -1175.8892467117882
Iteration 8 Epoch 0 log likelihood -940.0517405325245
Iteration 9 Epoch 0 log likelihood -669.058914282461
Iteration 10 Epoch 0 log likelihood -919.4022654196062
Iteration 11 Epoch 0 log likelihood -694.3461641155653
Iteration 12 Epoch 0 log likelihood -411.48342373887795
Iteration 13 Epoch 0 log likelihood -758.5973241470671
Iteration 14 Epoch 0 log likelihood -322.1890010783304
Iteration 15 Epoch 0 log likelihood -365.79535629667555
Iteration 16 Epoch 0 log likelihood -556.7190825302459
Iteration 17 Epoch 0 log likelihood -393.43110442250764
Iteration 18 Epoch 0 log likelihood -276.95880996738185
Iteration 19 

In [None]:
ts_fnn.parameters

([[array(0.38206485), array(0.15603732), array(1.14660242)],
  [array(-0.10186129), array(0.17201042), array(0.97525409)]],
 [array([0.32184362, 0.47900543, 0.95919774]),
  array([0.91372548, 1.05166494, 0.34533523])],
 array([1.34539053, 0.65109655, 0.55796106, 0.18582583, 0.12359252,
        0.08747642]),
 [array([0.69079616, 0.71392254]),
  array([0.31045413, 0.68527253]),
  array([0.26309087, 0.62463879]),
  array([-0.18671678,  0.32097147]),
  array([-0.27881538,  0.26822033]),
  array([-0.560979  ,  0.57458061])])

In [None]:
ts_fnn.predict([0.57, 0])

0.7538665601299405