# Notebook to plot the distribution of the caudal-rostral length of the spinal levels

In [265]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

In [29]:
# Path to csv files with information about the spinal levels
PATH_pred = "/Users/theomathieu/Downloads/fullD3_pred/res/dist_pred.csv"

In [30]:
df_pred = pd.read_csv(PATH_pred)
display(df_pred.head())

Unnamed: 0,level,sub_name,spinal_start,spinal_end,height,vertebrae_start,vertebrae_end
0,2,sub-mountSinai02_103_0000,208.0,219.0,8.8,0,1
1,3,sub-mountSinai02_103_0000,184.0,203.0,15.2,0,1
2,4,sub-mountSinai02_103_0000,166.0,179.0,10.4,0,1
3,5,sub-mountSinai02_103_0000,144.0,155.0,8.8,0,1
4,6,sub-mountSinai02_103_0000,123.0,136.0,10.4,0,1


In [175]:
# Add z-score to find outliers
z_scores = (df_pred['height'] - df_pred['height'].mean()) / df_pred['height'].std()
df_pred["z_scores"] = z_scores

### Dataframe creation

In [240]:
df_height = df_pred[["height", "level"]].copy()
df_height["mesure"] = "length"
df_height["from"] = "pred_value"
df_height.rename(columns={"height": "value"}, inplace=True)
display(df_height.head())

Unnamed: 0,value,level,mesure,from
0,8.8,2,length,pred_value
1,15.2,3,length,pred_value
2,10.4,4,length,pred_value
3,8.8,5,length,pred_value
4,10.4,6,length,pred_value


In [241]:
for level in df_height["level"].unique():
    new_line_mean = pd.DataFrame({"value": [df_height.loc[df_height["level"] == level, "value"].mean()],
                                  "level": [level.astype(int)],
                                  "mesure": ["mean"],
                                  "from": ["pred"]})
    df_height = pd.concat([df_height, new_line_mean], ignore_index=True)
    new_line_std = pd.DataFrame({"value": [df_height.loc[df_height["level"] == level, "value"].std()],
                                 "level": [level.astype(int)],
                                 "mesure": ["std"],
                                 "from": ["pred"]})
    df_height = pd.concat([df_height, new_line_std], ignore_index=True)
display(df_height.head())

Unnamed: 0,value,level,mesure,from
0,8.8,2,length,pred_value
1,15.2,3,length,pred_value
2,10.4,4,length,pred_value
3,8.8,5,length,pred_value
4,10.4,6,length,pred_value


In [242]:
cadotte = {
    '3': {'mean': 10.5, 'std': 2.2},
    '4': {'mean': 9.9, 'std': 1.3},
    '5': {'mean': 10.5, 'std': 1.5},
    '6': {'mean': 9.7, 'std': 1.6},
    '7': {'mean': 9.4, 'std': 1.4},
    '8': {'mean': 9.6, 'std': 1.4},
}
df_cadotte_mean = pd.DataFrame(cadotte).T[['mean']]
df_cadotte_mean["level"] = df_cadotte_mean.index.astype(int)
df_cadotte_mean["from"] = "cadotte"
df_cadotte_mean["mesure"] = "mean"
df_cadotte_mean.rename(columns={"mean": "value"}, inplace=True)
df_cadotte_std = pd.DataFrame(cadotte).T[['std']]
df_cadotte_std["level"] = df_cadotte_mean.index.astype(int)
df_cadotte_std["from"] = "cadotte"
df_cadotte_std["mesure"] = "std"
df_cadotte_std.rename(columns={"std": "value"}, inplace=True)
df_cadotte = pd.concat([df_cadotte_mean, df_cadotte_std], ignore_index=True)
display(df_cadotte.head())

Unnamed: 0,value,level,from,mesure
0,10.5,3,cadotte,mean
1,9.9,4,cadotte,mean
2,10.5,5,cadotte,mean
3,9.7,6,cadotte,mean
4,9.4,7,cadotte,mean


In [243]:
df = pd.concat([df_height, df_cadotte], ignore_index=True)
display(df.head())

Unnamed: 0,value,level,mesure,from
0,8.8,2,length,pred_value
1,15.2,3,length,pred_value
2,10.4,4,length,pred_value
3,8.8,5,length,pred_value
4,10.4,6,length,pred_value


In [303]:
fig = px.scatter(df[df["from"] == "pred_value"], x="level", y="value", color="from")
for level in df["level"].unique():
    df_level = df[df["level"] == level]
    mean = df_level.loc[df_level["mesure"] == "mean"]
    std = df_level.loc[df_level["mesure"] == "std"]
    #TODO handle nan values
    if not np.isnan(mean.loc[mean["from"] == "pred", "value"].values[0]):
        fig.add_shape(type="line",
                      x0=level + 0.2, y0=mean.loc[mean["from"] == "pred", "value"].values[0] -
                                         std.loc[std["from"] == "pred", "value"].values[0],
                      x1=level + 0.2, y1=mean.loc[mean["from"] == "pred", "value"].values[0] +
                                         std.loc[std["from"] == "pred", "value"].values[0],
                      line=dict(width=1, color="red") )
        fig.add_trace(go.Scatter(
            x=[level + 0.2],  # x-coordinate of the point
            y=[mean.loc[mean["from"] == "pred", "value"].values[0]],  # y-coordinate of the point
            mode='markers',
            marker=dict(color='red', size=5, symbol='circle'), showlegend=False
        ))
    try :
        fig.add_shape(type="line",
                      x0=level - 0.2, y0=mean.loc[mean["from"] == "cadotte", "value"].values[0] -
                                         std.loc[std["from"] == "cadotte", "value"].values[0],
                      x1=level - 0.2, y1=mean.loc[mean["from"] == "cadotte", "value"].values[0] +
                                         std.loc[std["from"] == "cadotte", "value"].values[0],
                      line=dict(width=1, color="green") )
        fig.add_trace(go.Scatter(
            x=[level - 0.2],  # x-coordinate of the point
            y=[mean.loc[mean["from"] == "cadotte", "value"].values[0]],  # y-coordinate of the point
            mode='markers',
            marker=dict(color='green', size=5, symbol='circle'), showlegend=False
        ))
    except:
        pass
fig.show()

Unnamed: 0,value,level,mesure,from
0,8.8,2,length,pred
1,15.2,3,length,pred
2,10.4,4,length,pred
3,8.8,5,length,pred
4,10.4,6,length,pred
...,...,...,...,...
2665,8.8,7,length,pred
2666,8.8,8,length,pred
2667,11.2,9,length,pred
2668,,10,length,pred
