# Get started with inverse modeling

Establish view on root directory and set GPU.

In [1]:
import sys
import os

# Set path to root directory
sys.path.append(r'/home/rlfowler/Documents/research/tfo_inverse_modelling')

# Set my GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

## Read in Data

In [2]:
import pandas as pd

DATA_PATH = r'/home/rlfowler/Documents/research/tfo_inverse_modelling/Randalls Folder/data/randall_data_intensities.pkl'
#CONFIG_PATH = r'/home/rlfowler/Documents/research/tfo_sim/data/compiled_intensity/randall_data.json'

# Load the data
data = pd.read_pickle(DATA_PATH)
print(data.shape)
data.head()

(35929014, 47)


Unnamed: 0,Maternal Wall Thickness,Fetal Radius,Fetal Displacement,Maternal Hb Concentration,Maternal Saturation,Fetal Hb Concentration,Fetal Saturation,10.0_1.0,15.0_1.0,19.0_1.0,...,55.0_2.0,59.0_2.0,64.0_2.0,68.0_2.0,72.0_2.0,77.0_2.0,81.0_2.0,86.0_2.0,90.0_2.0,94.0_2.0
43923,2.0,50.0,5.0,11.0,0.9,10.725,0.1,4.1e-05,5e-06,1e-06,...,6.001589e-15,5.848743e-15,2.763929e-16,1.500358e-17,3.3986070000000002e-18,1.015295e-18,7.130596999999999e-20,8.710054e-22,4.0124559999999997e-20,2.3557480000000003e-22
43924,2.0,50.0,5.0,11.0,0.9,10.725,0.15,4.1e-05,5e-06,1e-06,...,5.893694e-15,5.768395e-15,2.710312e-16,1.4636420000000003e-17,3.212955e-18,9.575111e-19,6.831443999999999e-20,8.155644e-22,3.8358879999999996e-20,2.151092e-22
43925,2.0,50.0,5.0,11.0,0.9,10.725,0.2,4.1e-05,5e-06,1e-06,...,5.789363e-15,5.689761e-15,2.658178e-16,1.4287270000000002e-17,3.039417e-18,9.030256999999999e-19,6.545149e-20,7.6462310000000005e-22,3.66709e-20,1.9642150000000002e-22
43926,2.0,50.0,5.0,11.0,0.9,10.725,0.25,4.1e-05,5e-06,1e-06,...,5.688417e-15,5.612798e-15,2.607455e-16,1.3955180000000002e-17,2.877101e-18,8.516512999999999e-19,6.271132e-20,7.177746000000001e-22,3.505719e-20,1.793575e-22
43927,2.0,50.0,5.0,11.0,0.9,10.725,0.3,4.1e-05,5e-06,1e-06,...,5.59072e-15,5.537444e-15,2.558081e-16,1.3639310000000001e-17,2.725183e-18,8.032089999999999e-19,6.008842e-20,6.746510000000001e-22,3.35145e-20,1.637759e-22


### Select subset of data

In [3]:
def print_columns(data):
    size = data.shape[0]
    print(f"Size of data: {size}")
    for col in data.columns[:7]:
        u = data[col].unique()
        print(f"{col}({len(u)}): {u}")

print_columns(data)

Size of data: 35929014
Maternal Wall Thickness(9): [ 2.  3.  4.  5.  6.  7.  8.  9. 10.]
Fetal Radius(23): [ 50.  70.  90. 110. 130. 150. 170. 190. 210. 230. 250. 270. 290. 310.
 330. 350. 370. 390. 410. 430. 450. 470. 490.]
Fetal Displacement(9): [ 5. 15. 20. 25. 40. 10. 35.  0. 30.]
Maternal Hb Concentration(11): [11.  11.5 12.  12.5 13.  13.5 14.  14.5 15.  15.5 16. ]
Maternal Saturation(11): [0.9  0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1.  ]
Fetal Hb Concentration(33): [10.725  11.     11.2125 11.275  11.5    11.7    11.7875 12.     12.1875
 12.3    12.5    12.675  12.8125 13.     13.1625 13.325  13.5    13.65
 13.8375 14.     14.1375 14.35   14.5    14.625  14.8625 15.     15.1125
 15.375  15.5    15.6    15.8875 16.     16.4   ]
Fetal Saturation(11): [0.1  0.15 0.2  0.25 0.3  0.35 0.4  0.45 0.5  0.55 0.6 ]


In [4]:
import numpy as np

r1 = np.unique(data['Fetal Hb Concentration'])
print(r1[1::3])
r2 = np.unique(data['Fetal Radius'])[:11]
print(r2)
r3 = np.unique(data['Maternal Saturation'])[::2]
print(r3)
r4 = np.unique(data['Maternal Hb Concentration'])[::2]
print(r4)

[11.  11.5 12.  12.5 13.  13.5 14.  14.5 15.  15.5 16. ]
[ 50.  70.  90. 110. 130. 150. 170. 190. 210. 230. 250.]
[0.9  0.92 0.94 0.96 0.98 1.  ]
[11. 12. 13. 14. 15. 16.]


In [7]:
filtered_data = data.loc[data['Fetal Hb Concentration'].isin(r1[1::3])]
filtered_data = filtered_data.loc[filtered_data['Fetal Radius'].isin(r2)]
filtered_data = filtered_data.loc[filtered_data['Maternal Saturation'].isin(r3)]
filtered_data = filtered_data.loc[filtered_data['Maternal Hb Concentration'].isin(r4)]
print(f"Reduced data by factor of {data.shape[0]/filtered_data.shape[0]:0.2f}: {filtered_data.shape}")
data = filtered_data
del filtered_data

Reduced data by factor of 20.42: (1759824, 47)


### Select output to predict

In [8]:
y_columns = data.columns[:7]#["Fetal Saturation"]
x_columns = data.columns[7:]
print(f"y_columns: {y_columns.tolist()}")
print(f"x_columns: {x_columns.tolist()}")
IN_FEATURES = len(x_columns)
OUT_FEATURES = len(y_columns)
print("In Features :", IN_FEATURES)  
print("Out Features:", OUT_FEATURES)

y_columns: ['Maternal Wall Thickness', 'Fetal Radius', 'Fetal Displacement', 'Maternal Hb Concentration', 'Maternal Saturation', 'Fetal Hb Concentration', 'Fetal Saturation']
x_columns: ['10.0_1.0', '15.0_1.0', '19.0_1.0', '24.0_1.0', '28.0_1.0', '33.0_1.0', '37.0_1.0', '41.0_1.0', '46.0_1.0', '50.0_1.0', '55.0_1.0', '59.0_1.0', '64.0_1.0', '68.0_1.0', '72.0_1.0', '77.0_1.0', '81.0_1.0', '86.0_1.0', '90.0_1.0', '94.0_1.0', '10.0_2.0', '15.0_2.0', '19.0_2.0', '24.0_2.0', '28.0_2.0', '33.0_2.0', '37.0_2.0', '41.0_2.0', '46.0_2.0', '50.0_2.0', '55.0_2.0', '59.0_2.0', '64.0_2.0', '68.0_2.0', '72.0_2.0', '77.0_2.0', '81.0_2.0', '86.0_2.0', '90.0_2.0', '94.0_2.0']
In Features : 40
Out Features: 7
