In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from sklearn import preprocessing
from inverse_modelling_tfo.data import generate_data_loaders, equidistance_detector_normalization, constant_detector_count_normalization
from inverse_modelling_tfo.data.intensity_interpolation import interpolate_exp, get_interpolate_fit_params, exp_piecewise_affine
from inverse_modelling_tfo.data.interpolation_function_zoo import *
from inverse_modelling_tfo.features import LongToWideIntensityTransformation, ToFittingParameterTransformation
from inverse_modelling_tfo.models.custom_models import SplitChannelCNN, PerceptronReLU, PerceptronBN, PerceptronDO, PerceptronBD
from inverse_modelling_tfo.features.build_features import FetalACFeatureBuilder, RowCombinationFeatureBuilder, TwoColumnOperationFeatureBuilder, FetalACbyDCFeatureBuilder
from sklearn.feature_selection import mutual_info_regression
# Set my GPU
os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [2]:
# DATA_PATH = r'/home/rraiyan/personal_projects/tfo_inverse_modelling/data/intensity/s_based_intensity_low_conc3.pkl'
DATA_PATH = (r"/home/rraiyan/personal_projects/tfo_inverse_modelling/inverse_modelling_tfo/tools/s_based_intensity_low_conc5.pkl")

data = pd.read_pickle(DATA_PATH)
# Drop Thickness values which I did not calculate the close concnetration points
equidistance_detector_normalization(data)

# Drop Uterus Thickness for now
data = data.drop(columns="Uterus Thickness")

# Interpolate intensity to remove noise
# data = interpolate_exp(data, weights=(1, 0.8), interpolation_function=exp_piecewise_affine, break_indices=[4, 12, 20])
data = interpolate_exp(data, weights=(1, 0.6), interpolation_function=exp_piecewise_affine, break_indices=[4, 12, 20])
data['Intensity'] = data['Interpolated Intensity']
data = data.drop(columns='Interpolated Intensity')

data_transformer = LongToWideIntensityTransformation()
data = data_transformer.transform(data)
labels = data_transformer.get_label_names()
intensity_columns = data_transformer.get_feature_names()

# Cleanup
data.dropna(inplace=True)

In [3]:
# ROUNDING = 3
# fetal_conc_group_mapping = {
#     0.110 : 0,
#     0.115 : 0, 
#     0.100 : 0,
#     0.125 : 1,
#     0.130 : 1,
#     0.145 : 1,
#     0.155 : 2,
#     0.160 : 2,
#     0.170 : 2
# }

In [4]:
ROUNDING = 2
fetal_conc_group_mapping = {
    10.45 : 0,
    11.00: 0, 
    11.40: 0,
    11.55: 1,
    12.00: 1,
    12.35: 1,
    12.60: 2,
    13.00: 2,
    13.30: 2,
    13.65: 3,
    14.00: 3,
    14.25: 3,
    14.70: 4,
    15.00: 4,
    15.75: 4,
}

In [5]:
print(labels)

['Maternal Wall Thickness', 'Maternal Hb Concentration', 'Maternal Saturation', 'Fetal Hb Concentration', 'Fetal Saturation']


In [6]:
# Mapping keys have 2 decimal points - round up for a perfect match
data['FconcCenters'] = data['Fetal Hb Concentration'].round(ROUNDING).map(fetal_conc_group_mapping)    

# Build Features
# feature_builder1 = FetalACFeatureBuilder('FconcCenters', 'perm', '-')
# data1 = feature_builder1.build_feature(data)
# x_columns1 = feature_builder1.get_feature_names()
# # data1[x_columns1] = np.log10(np.abs(data1[x_columns1]))
# feature_builder2 =  TwoColumnOperationFeatureBuilder(x_columns1[:len(x_columns1)//2], x_columns1[len(x_columns1)//2:], "/", False)
# data1 = feature_builder2.build_feature(data1)

# data["Intensity"] = np.log10(data["Intensity"])
feature_builder3 = FetalACbyDCFeatureBuilder('FconcCenters', 'perm', intensity_columns, labels, "max")
x_columns3 = feature_builder3.get_feature_names()
# feature_builder3 = FetalACFeatureBuilder('FconcCenters', 'perm', '-')

feature_builder4 = TwoColumnOperationFeatureBuilder(x_columns3, x_columns3, "*", False, x_columns3, labels)
x_columns4 = feature_builder4.get_feature_names()

feature_builder5 = TwoColumnOperationFeatureBuilder(x_columns4[len(x_columns4)//2:], x_columns4[:len(x_columns4)//2], "/", False, x_columns4, labels)

data1 = feature_builder5(feature_builder4(feature_builder3(data)))

# build spatial intensity combos
# fixed_columns = ['Maternal Wall Thickness', "Maternal Hb Concentration", "Maternal Saturation", "Fetal Saturation", "FconcCenters"]
# data2, x_columns2, labels = create_spatial_intensity(data)
# data2, x_columns2, labels = create_row_combos(data2, x_columns2, fixed_columns, ["Fetal Hb Concentration"], combo_count=2, perm_or_comb="perm")
# data = pd.merge(data1, data2, how='inner')

data = data1

In [7]:
data.head()

Unnamed: 0,Maternal Wall Thickness,Maternal Hb Concentration,Maternal Saturation,Fetal Saturation,FconcCenters,Fetal Hb Concentration 0,Fetal Hb Concentration 1,MAX_ACbyDC_WV2_0_*_MAX_ACbyDC_WV2_0_/_MAX_ACbyDC_WV1_0_*_MAX_ACbyDC_WV1_0,MAX_ACbyDC_WV2_1_*_MAX_ACbyDC_WV2_1_/_MAX_ACbyDC_WV1_1_*_MAX_ACbyDC_WV1_1,MAX_ACbyDC_WV2_2_*_MAX_ACbyDC_WV2_2_/_MAX_ACbyDC_WV1_2_*_MAX_ACbyDC_WV1_2,...,MAX_ACbyDC_WV2_10_*_MAX_ACbyDC_WV2_10_/_MAX_ACbyDC_WV1_10_*_MAX_ACbyDC_WV1_10,MAX_ACbyDC_WV2_11_*_MAX_ACbyDC_WV2_11_/_MAX_ACbyDC_WV1_11_*_MAX_ACbyDC_WV1_11,MAX_ACbyDC_WV2_12_*_MAX_ACbyDC_WV2_12_/_MAX_ACbyDC_WV1_12_*_MAX_ACbyDC_WV1_12,MAX_ACbyDC_WV2_13_*_MAX_ACbyDC_WV2_13_/_MAX_ACbyDC_WV1_13_*_MAX_ACbyDC_WV1_13,MAX_ACbyDC_WV2_14_*_MAX_ACbyDC_WV2_14_/_MAX_ACbyDC_WV1_14_*_MAX_ACbyDC_WV1_14,MAX_ACbyDC_WV2_15_*_MAX_ACbyDC_WV2_15_/_MAX_ACbyDC_WV1_15_*_MAX_ACbyDC_WV1_15,MAX_ACbyDC_WV2_16_*_MAX_ACbyDC_WV2_16_/_MAX_ACbyDC_WV1_16_*_MAX_ACbyDC_WV1_16,MAX_ACbyDC_WV2_17_*_MAX_ACbyDC_WV2_17_/_MAX_ACbyDC_WV1_17_*_MAX_ACbyDC_WV1_17,MAX_ACbyDC_WV2_18_*_MAX_ACbyDC_WV2_18_/_MAX_ACbyDC_WV1_18_*_MAX_ACbyDC_WV1_18,MAX_ACbyDC_WV2_19_*_MAX_ACbyDC_WV2_19_/_MAX_ACbyDC_WV1_19_*_MAX_ACbyDC_WV1_19
0,6.0,10.45,0.9,0.2,1.0,11.55,12.0,0.001378,0.002712,0.007628,...,0.69416,0.724642,0.378328,0.431282,0.504064,0.567882,0.655105,0.731212,0.834785,0.924817
1,6.0,10.45,0.9,0.2,1.0,12.0,11.55,0.001378,0.002712,0.007628,...,0.69416,0.724642,0.378328,0.431282,0.504064,0.567882,0.655105,0.731212,0.834785,0.924817
2,6.0,10.45,0.9,0.2,2.0,12.6,13.0,0.001425,0.002792,0.007754,...,0.691554,0.721471,0.38321,0.437253,0.511271,0.575921,0.663906,0.740327,0.843819,0.933317
3,6.0,10.45,0.9,0.2,2.0,13.0,12.6,0.001425,0.002792,0.007754,...,0.691554,0.721471,0.38321,0.437253,0.511271,0.575921,0.663906,0.740327,0.843819,0.933317
4,6.0,10.45,0.9,0.2,3.0,13.65,14.0,0.001469,0.002867,0.007876,...,0.690934,0.720391,0.387444,0.442621,0.517936,0.583474,0.672296,0.749105,0.852629,0.941709


In [8]:
x_columns = feature_builder5.get_feature_names()
# x_columns = feature_builder4.get_feature_names()
# x_columns = feature_builder2.get_feature_names()
# x_columns = x_columns2
# y_columns = feature_builder1.get_label_names()
y_columns = feature_builder3.get_label_names()
y_columns.remove('FconcCenters')


In [9]:
# Replace with ConcDiff
data["ConcDiff"] = data["Fetal Hb Concentration 1"] - data["Fetal Hb Concentration 0"]
y_columns.append("ConcDiff")
y_columns.remove("Fetal Hb Concentration 1")
y_columns.remove("Fetal Hb Concentration 0")

print(y_columns)

['Maternal Wall Thickness', 'Maternal Hb Concentration', 'Maternal Saturation', 'Fetal Saturation', 'ConcDiff']


In [10]:
# # CLean data before calculating mutual info
# data.dropna(inplace=True)

# for target in y_columns:
#     mutual_info = mutual_info_regression(data[x_columns], data[target])
#     mutual_info = pd.Series(mutual_info)
#     mutual_info.index = pd.Index(x_columns)
#     print("Target = ", target)
#     print(mutual_info.sort_values(ascending=False)[:10])

In [11]:
mutual_info = mutual_info_regression(data[x_columns], data[y_columns[-2]])
mutual_info = pd.Series(mutual_info)
mutual_info.index = pd.Index(x_columns)
print(mutual_info.sort_values(ascending=False)[:20])

MAX_ACbyDC_WV2_19_*_MAX_ACbyDC_WV2_19_/_MAX_ACbyDC_WV1_19_*_MAX_ACbyDC_WV1_19    0.871650
MAX_ACbyDC_WV2_18_*_MAX_ACbyDC_WV2_18_/_MAX_ACbyDC_WV1_18_*_MAX_ACbyDC_WV1_18    0.854422
MAX_ACbyDC_WV2_17_*_MAX_ACbyDC_WV2_17_/_MAX_ACbyDC_WV1_17_*_MAX_ACbyDC_WV1_17    0.848537
MAX_ACbyDC_WV2_16_*_MAX_ACbyDC_WV2_16_/_MAX_ACbyDC_WV1_16_*_MAX_ACbyDC_WV1_16    0.834175
MAX_ACbyDC_WV2_15_*_MAX_ACbyDC_WV2_15_/_MAX_ACbyDC_WV1_15_*_MAX_ACbyDC_WV1_15    0.805810
MAX_ACbyDC_WV2_14_*_MAX_ACbyDC_WV2_14_/_MAX_ACbyDC_WV1_14_*_MAX_ACbyDC_WV1_14    0.771645
MAX_ACbyDC_WV2_3_*_MAX_ACbyDC_WV2_3_/_MAX_ACbyDC_WV1_3_*_MAX_ACbyDC_WV1_3        0.679669
MAX_ACbyDC_WV2_13_*_MAX_ACbyDC_WV2_13_/_MAX_ACbyDC_WV1_13_*_MAX_ACbyDC_WV1_13    0.676952
MAX_ACbyDC_WV2_2_*_MAX_ACbyDC_WV2_2_/_MAX_ACbyDC_WV1_2_*_MAX_ACbyDC_WV1_2        0.652549
MAX_ACbyDC_WV2_11_*_MAX_ACbyDC_WV2_11_/_MAX_ACbyDC_WV1_11_*_MAX_ACbyDC_WV1_11    0.583591
MAX_ACbyDC_WV2_12_*_MAX_ACbyDC_WV2_12_/_MAX_ACbyDC_WV1_12_*_MAX_ACbyDC_WV1_12    0.583182
MAX_ACbyDC