In [1]:
from distutils.log import Log
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from sklearn import preprocessing
from inverse_modelling_tfo.data import (
    generate_data_loaders,
    equidistance_detector_normalization,
    constant_detector_count_normalization,
)
from inverse_modelling_tfo.data.intensity_interpolation import (
    interpolate_exp,
    get_interpolate_fit_params,
    exp_piecewise_affine,
)
from inverse_modelling_tfo.data.interpolation_function_zoo import *
from inverse_modelling_tfo.features import LongToWideIntensityTransformation, ToFittingParameterTransformation
from inverse_modelling_tfo.models.custom_models import (
    SplitChannelCNN,
    PerceptronReLU,
    PerceptronBN,
    PerceptronDO,
    PerceptronBD,
)
from inverse_modelling_tfo.features.build_features import (
    FetalACFeatureBuilder,
    RowCombinationFeatureBuilder,
    TwoColumnOperationFeatureBuilder,
    FetalACbyDCFeatureBuilder,
    ConcatenateFeatureBuilder,
    LogTransformFeatureBuilder
)
from inverse_modelling_tfo.features.data_transformations import (
    LongToWideIntensityTransformation,
    ToFittingParameterTransformation,
)
from sklearn.feature_selection import mutual_info_regression

# Set my GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [2]:
# DATA_PATH = r'/home/rraiyan/personal_projects/tfo_inverse_modelling/data/intensity/s_based_intensity_low_conc3.pkl'
DATA_PATH = (r"/home/rraiyan/personal_projects/tfo_inverse_modelling/inverse_modelling_tfo/tools/s_based_intensity_low_conc5.pkl")

data = pd.read_pickle(DATA_PATH)
# Drop Thickness values which I did not calculate the close concnetration points
equidistance_detector_normalization(data)

# Drop Uterus Thickness for now
data = data.drop(columns="Uterus Thickness")

# Interpolate intensity to remove noise
# data = interpolate_exp(data, weights=(1, 0.8), interpolation_function=exp_piecewise_affine, break_indices=[4, 12, 20])
data = interpolate_exp(data, weights=(1, 0.6), interpolation_function=exp_piecewise_affine, break_indices=[4, 12, 20])
data['Intensity'] = data['Interpolated Intensity']
data = data.drop(columns='Interpolated Intensity')

data_transformer = LongToWideIntensityTransformation()
fitting_param_transformer = ToFittingParameterTransformation()

fitting_params = fitting_param_transformer.transform(data)
data = data_transformer.transform(data)
labels = data_transformer.get_label_names()
intensity_columns = data_transformer.get_feature_names()
# Cleanup
data.dropna(inplace=True)

In [3]:
# ROUNDING = 3
# fetal_conc_group_mapping = {
#     0.110 : 0,
#     0.115 : 0, 
#     0.100 : 0,
#     0.125 : 1,
#     0.130 : 1,
#     0.145 : 1,
#     0.155 : 2,
#     0.160 : 2,
#     0.170 : 2
# }

In [4]:
ROUNDING = 2
fetal_conc_group_mapping = {
    10.45 : 0,
    11.00: 0, 
    11.40: 0,
    11.55: 1,
    12.00: 1,
    12.35: 1,
    12.60: 2,
    13.00: 2,
    13.30: 2,
    13.65: 3,
    14.00: 3,
    14.25: 3,
    14.70: 4,
    15.00: 4,
    15.75: 4,
}

In [5]:
print(labels)

['Maternal Wall Thickness', 'Maternal Hb Concentration', 'Maternal Saturation', 'Fetal Hb Concentration', 'Fetal Saturation']


In [6]:
# Mapping keys have 2 decimal points - round up for a perfect match
data['FconcCenters'] = data['Fetal Hb Concentration'].round(ROUNDING).map(fetal_conc_group_mapping)    
fitting_params['FconcCenters'] = data['FconcCenters']
fixed_columns = ['Maternal Wall Thickness', "Maternal Hb Concentration", "Maternal Saturation", "Fetal Saturation", "FconcCenters"]

In [7]:
# Build Features
# feature_builder1 = FetalACFeatureBuilder('FconcCenters', 'perm', '-')
# data1 = feature_builder1.build_feature(data)
# x_columns1 = feature_builder1.get_feature_names()
# # data1[x_columns1] = np.log10(np.abs(data1[x_columns1]))
# feature_builder2 =  TwoColumnOperationFeatureBuilder(x_columns1[:len(x_columns1)//2], x_columns1[len(x_columns1)//2:], "/", False)
# data1 = feature_builder2.build_feature(data1)

# data["Intensity"] = np.log10(data["Intensity"])
# feature_builder3 = FetalACbyDCFeatureBuilder('FconcCenters', 'perm', intensity_columns, labels, "max")
# x_columns3 = feature_builder3.get_feature_names()
# # feature_builder3 = FetalACFeatureBuilder('FconcCenters', 'perm', '-')

# feature_builder4 = TwoColumnOperationFeatureBuilder(x_columns3, x_columns3, "*", False, x_columns3, labels)
# x_columns4 = feature_builder4.get_feature_names()

# feature_builder5 = TwoColumnOperationFeatureBuilder(x_columns4[len(x_columns4)//2:], x_columns4[:len(x_columns4)//2], "/", False, x_columns4, labels)

# data1 = feature_builder5(feature_builder4(feature_builder3(data)))


# Fitting Parameter Manipulations

In [8]:
fb0 = RowCombinationFeatureBuilder(fitting_param_transformer.get_feature_names(), fixed_columns, ["Fetal Hb Concentration"], 'perm', 2)
x_columns0 = fb0.get_feature_names()
# AC operation
fb1 = TwoColumnOperationFeatureBuilder.from_chain(fb0, x_columns0[:len(x_columns0)//2], x_columns0[len(x_columns0)//2:], '*', False)
fb2 = TwoColumnOperationFeatureBuilder.from_chain(fb0, x_columns0[:len(x_columns0)//2], x_columns0[len(x_columns0)//2:], '+', False)
fb3 = TwoColumnOperationFeatureBuilder.from_chain(fb0, x_columns0[:len(x_columns0)//2], x_columns0[len(x_columns0)//2:], '-', False)
fb4 = TwoColumnOperationFeatureBuilder.from_chain(fb0, x_columns0[:len(x_columns0)//2], x_columns0[len(x_columns0)//2:], '/', False)
# wv1 and wv2 operation
fb5 = TwoColumnOperationFeatureBuilder.from_chain(fb0, x_columns0[::2], x_columns0[1::2], '+', False)
fb6 = TwoColumnOperationFeatureBuilder.from_chain(fb0, x_columns0[::2], x_columns0[1::2], '-', False)
fb7 = TwoColumnOperationFeatureBuilder.from_chain(fb0, x_columns0[::2], x_columns0[1::2], '*', False)
fb8 = TwoColumnOperationFeatureBuilder.from_chain(fb0, x_columns0[::2], x_columns0[1::2], '/', False)

# Concatenate
fb9 = ConcatenateFeatureBuilder([fb1, fb2, fb3, fb4, fb5, fb6, fb7, fb8])

data1 = fb9(fitting_params)
data = data1

In [None]:
data.head()

Unnamed: 0,Maternal Wall Thickness,Maternal Hb Concentration,Maternal Saturation,Fetal Saturation,FconcCenters,Fetal Hb Concentration 0,Fetal Hb Concentration 1,alpha0_1.0_1_*_alpha0_1.0_2,alpha0_2.0_1_*_alpha0_2.0_2,alpha1_1.0_1_*_alpha1_1.0_2,...,alpha2_1.0_1_/_alpha2_2.0_1,alpha3_1.0_1_/_alpha3_2.0_1,alpha4_1.0_1_/_alpha4_2.0_1,alpha5_1.0_1_/_alpha5_2.0_1,alpha0_1.0_2_/_alpha0_2.0_2,alpha1_1.0_2_/_alpha1_2.0_2,alpha2_1.0_2_/_alpha2_2.0_2,alpha3_1.0_2_/_alpha3_2.0_2,alpha4_1.0_2_/_alpha4_2.0_2,alpha5_1.0_2_/_alpha5_2.0_2
0,6.0,10.45,0.9,0.2,1.0,11.55,12.0,14.291037,7.145716,0.395758,...,0.970392,0.704388,0.88335,0.721638,1.413738,0.811464,0.97105,0.705233,0.884788,0.720139
1,6.0,10.45,0.9,0.2,1.0,12.0,11.55,14.291037,7.145716,0.395758,...,0.97105,0.705233,0.884788,0.720139,1.414651,0.811134,0.970392,0.704388,0.88335,0.721638
2,6.0,10.45,0.9,0.2,2.0,12.6,13.0,14.241688,7.140945,0.396538,...,0.971879,0.706322,0.886619,0.718244,1.411857,0.812144,0.972404,0.707024,0.88779,0.717041
3,6.0,10.45,0.9,0.2,2.0,13.0,12.6,14.241688,7.140945,0.396538,...,0.972404,0.707024,0.88779,0.717041,1.412586,0.81188,0.971879,0.706322,0.886619,0.718244
4,6.0,10.45,0.9,0.2,3.0,13.65,14.0,14.197301,7.136642,0.397241,...,0.973213,0.708127,0.889615,0.715181,1.410156,0.812758,0.973629,0.708701,0.890561,0.714224


In [None]:
x_columns = fb9.get_feature_names()
# x_columns = feature_builder4.get_feature_names()
# x_columns = feature_builder2.get_feature_names()
# x_columns = x_columns2
# y_columns = feature_builder1.get_label_names()
y_columns = fb9.get_label_names()

In [None]:
# Replace with ConcDiff
data["ConcDiff"] = data["Fetal Hb Concentration 1"] - data["Fetal Hb Concentration 0"]
if "ConcDiff" not in y_columns:
    y_columns.append("ConcDiff")

if "Fetal Hb Concentration 1" in y_columns:
    y_columns.remove("Fetal Hb Concentration 1")

if "Fetal Hb Concentration 0" in y_columns:
    y_columns.remove("Fetal Hb Concentration 0")

if "FConcCenters" in y_columns:
    y_columns.remove("FConcCenters")
print(y_columns)

['Maternal Wall Thickness', 'Maternal Hb Concentration', 'Maternal Saturation', 'Fetal Saturation', 'FconcCenters', 'ConcDiff']


In [None]:
# # CLean data before calculating mutual info
# data.dropna(inplace=True)

# for target in y_columns:
#     mutual_info = mutual_info_regression(data[x_columns], data[target])
#     mutual_info = pd.Series(mutual_info)
#     mutual_info.index = pd.Index(x_columns)
#     print("Target = ", target)
#     print(mutual_info.sort_values(ascending=False)[:10])

In [None]:
# MI with a single target
mutual_info = mutual_info_regression(data[x_columns], data[y_columns[-2]])
mutual_info = pd.Series(mutual_info)
mutual_info.index = pd.Index(x_columns)
print(mutual_info.sort_values(ascending=False)[:20])

alpha1_2.0_1_*_alpha1_2.0_2    1.168729
alpha1_2.0_1_+_alpha1_2.0_2    1.167222
alpha0_2.0_1_*_alpha0_2.0_2    1.158065
alpha0_2.0_1_+_alpha0_2.0_2    1.157544
alpha0_2.0_1_-_alpha0_2.0_2    0.906507
alpha1_2.0_1_-_alpha1_2.0_2    0.895405
alpha1_1.0_1_-_alpha1_1.0_2    0.846460
alpha0_1.0_1_-_alpha0_1.0_2    0.810538
alpha5_2.0_1_+_alpha5_2.0_2    0.805184
alpha5_2.0_1_*_alpha5_2.0_2    0.804817
alpha3_1.0_1_-_alpha3_1.0_2    0.661388
alpha2_1.0_1_-_alpha2_1.0_2    0.609916
alpha5_2.0_1_-_alpha5_2.0_2    0.609258
alpha3_2.0_1_-_alpha3_2.0_2    0.600818
alpha4_1.0_1_-_alpha4_1.0_2    0.593268
alpha0_2.0_1_/_alpha0_2.0_2    0.592959
alpha3_2.0_1_+_alpha3_2.0_2    0.580371
alpha3_2.0_1_*_alpha3_2.0_2    0.580359
alpha4_2.0_1_*_alpha4_2.0_2    0.571663
alpha2_2.0_1_-_alpha2_2.0_2    0.571133
dtype: float64
