# Processing, Clustering and Visualising in MDV the TAURUS data

#### Importing the required packages for data preprocessing

In [1]:
import pandas as pd

#### Importing the required packages for MDV set up and visualisation

In [None]:
import os
from mdvtools.mdvproject import MDVProject
from mdvtools.charts.dot_plot import DotPlot
from mdvtools.charts.scatter_plot_3D import ScatterPlot3D
from mdvtools.charts.table_plot import TablePlot
from mdvtools.charts.selection_dialog_plot import SelectionDialogPlot

## Data analysis section

In [3]:
spot_data = pd.read_csv('../../../../../Documents/spot_dataset_a/spot_dataset_a/feature_table_umap.csv')

In [4]:
spot_data = spot_data.set_index('unique_object_ids')

In [5]:
organoids_data = spot_data.iloc[:, :2]
features_data = spot_data.iloc[:, 2:]

In [6]:
organoids_data = organoids_data.reset_index()
features_data = features_data.reset_index()

In [7]:
organoids_data["timepoint_cat"] = "timepoint_" + organoids_data["timepoint"].astype("str")

In [8]:
features_data_try = spot_data.iloc[:, 2:].T
features_data_try.index.name = 'feature_id'
features_data_try.reset_index(inplace=True)

In [None]:
dot_plot = DotPlot(
    title= "Percent of gene expression per cell state",
    params = ["condition", {"linkedDsName":"features","maxItems":4,"type":"RowsAsColsQuery"}], 
    size=[700, 300],
    position=[10, 470]
)

# configuring the dot plot
dot_plot.set_axis_properties("x", {"label": "", "textSize": 13, "tickfont": 10})
dot_plot.set_axis_properties("y", {"label": "", "textSize": 13, "tickfont": 10})
dot_plot.set_axis_properties("ry", {"label": "", "textSize": 13, "tickfont": 10})
dot_plot.set_color_scale(log_scale=False)
dot_plot.set_color_legend(True, [40, 10])
dot_plot.set_fraction_legend(True, [0, 0])

feature1 = "UMAP1"
feature2 = "UMAP2"
feature3 = "UMAP3"

# creating a scatter plot
scatter_plot_1 = ScatterPlot3D(
    title=f"{feature1} x {feature2} x {feature3}",
    params=#["angular_second_moment", {"linkedDsName":"features","maxItems":4,"type":"RowsAsColsQuery"}],
    [f"Link|{feature1}(Link)|{features_data_try[features_data_try.feature_id == (feature1)].index[0]}", f"Link|{feature2}(Link)|{features_data_try[features_data_try.feature_id == (feature2)].index[0]}", f"Link|{feature3}(Link)|{features_data_try[features_data_try.feature_id == (feature3)].index[0]}"],
    size=[450, 450],
    position=[10, 10],
    default_color="#377eb8",
    brush="poly",
    #on_filter="hide",
    radius=5,
    opacity=0.8,
    center = [10.226573467254639, 11.17284369468689, 5.338449418544769],
    camera={"distance": 50, "theta": 0.5, "phi": 0.5}
)

# configuring the scatter plot
scatter_plot_1.set_color_by("condition")



# creating a scatter plot
scatter_plot_2 = ScatterPlot3D(
    title=f"{feature1} x {feature2} x {feature3}",
    params=#["angular_second_moment", {"linkedDsName":"features","maxItems":4,"type":"RowsAsColsQuery"}],
    [f"Link|{feature1}(Link)|{features_data_try[features_data_try.feature_id == (feature1)].index[0]}", f"Link|{feature2}(Link)|{features_data_try[features_data_try.feature_id == (feature2)].index[0]}", f"Link|{feature3}(Link)|{features_data_try[features_data_try.feature_id == (feature3)].index[0]}"],
    size=[450, 450],
    position=[470, 10],
    default_color="#377eb8",
    brush="poly",
    #on_filter="hide",
    radius=5,
    opacity=0.8,
    center = [10.226573467254639, 11.17284369468689, 5.338449418544769],
    camera={"distance": 50, "theta": 0.5, "phi": 0.5}
)

# configuring the scatter plot
scatter_plot_2.set_color_by("timepoint")




# creating a scatter plot
scatter_plot_3 = ScatterPlot3D(
    title=f"{feature1} x {feature2} x {feature3}",
    params=#["angular_second_moment", {"linkedDsName":"features","maxItems":4,"type":"RowsAsColsQuery"}],
    [f"Link|{feature1}(Link)|{features_data_try[features_data_try.feature_id == (feature1)].index[0]}", f"Link|{feature2}(Link)|{features_data_try[features_data_try.feature_id == (feature2)].index[0]}", f"Link|{feature3}(Link)|{features_data_try[features_data_try.feature_id == (feature3)].index[0]}"],
    size=[450, 450],
    position=[930, 10],
    default_color="#377eb8",
    brush="poly",
    #on_filter="hide",
    radius=5,
    opacity=0.8,
    center = [10.226573467254639, 11.17284369468689, 5.338449418544769],
    camera={"distance": 50, "theta": 0.5, "phi": 0.5}
)

# configuring the scatter plot
scatter_plot_3.set_color_by("unique_object_ids")



# creating a table plot
table = TablePlot(
    title="Parameters",
    params=["feature_id"],
    size=[200, 500],
    position=[10, 10]
)

# creating a table plot
selection_dialog = SelectionDialogPlot(
    title="Filtering",
    params=["condition", "timepoint", "unique_object_ids"],
    size=[200, 400],
    position=[10, 500]
)



In [10]:
#features_data_try.loc[features_data_try.feature_id == "min_curvature"]
features_data_try[features_data_try.feature_id == "min_curvature"].index[0]

0

In [11]:
features_data_try.iloc[:,:1]

unique_object_ids,feature_id
0,min_curvature
1,std_curvature
2,max_centroid_distance
3,mean_centroid_distance
4,std_mean_centroid_distance_ratio
...,...
232,div_sift_93
233,div_sift_94
234,UMAP1
235,UMAP2


In [12]:
# setting up and serving the MDV project
base = os.path.expanduser('~/mdv')
project_path = os.path.join(base, 'spot1') # defining the location where the project metadata will be stored
p = MDVProject(os.path.expanduser(project_path), delete_existing=True)

# # adding the two data sources to the project
p.add_datasource("data_organoids", organoids_data)
p.add_datasource("features", features_data_try.iloc[:,:1])

starting add_datasource
is ds None? None
got passed the ds check
created h5 group without error
- adding column 'unique_object_ids' to datasource 'data_organoids'
- adding column 'condition' to datasource 'data_organoids'
- adding column 'timepoint' to datasource 'data_organoids'
- adding column 'timepoint_cat' to datasource 'data_organoids'
 - non-dodgy columns: [{'datatype': 'text16', 'name': 'unique_object_ids', 'field': 'unique_object_ids', 'values': ['shape_high_organoid_0_0_0', 'shape_low_organoid_39_33_11', 'shape_low_organoid_38_32_6', 'shape_low_organoid_38_32_7', 'shape_low_organoid_38_32_8', 'shape_low_organoid_38_32_9', 'shape_low_organoid_39_33_0', 'shape_low_organoid_39_33_1', 'shape_low_organoid_39_33_10', 'shape_low_organoid_39_33_12', 'shape_low_organoid_35_29_22', 'shape_low_organoid_39_33_13', 'shape_low_organoid_39_33_14', 'shape_low_organoid_39_33_15', 'shape_low_organoid_39_33_16', 'shape_low_organoid_39_33_17', 'shape_low_organoid_39_33_18', 'shape_low_organoid_3

[]

In [13]:
numeric_data = features_data.T.iloc[1:, :].apply(pd.to_numeric, errors='coerce').values

In [14]:
# # creating the link between the two datasets so that selecting a subset of genes to add the expression in cells is enabled
p.add_rows_as_columns_link("data_organoids","features","feature_id","Link")
p.add_rows_as_columns_subgroup("data_organoids","features","Link",numeric_data.T) #add the gene expression 


In [15]:
# # converting the chart implementation outputs to JSON and setting up the project view
list_charts_cells = []
list_charts_features = []


# cells panel
list_charts_cells.extend([dot_plot.plot_data, scatter_plot_1.plot_data, scatter_plot_2.plot_data, scatter_plot_3.plot_data, selection_dialog.plot_data])
list_charts_features.extend([table.plot_data])

# setting the config combining the two panels
view_config = {'initialCharts': {"data_organoids": list_charts_cells, "features": list_charts_features}}

# adding the view to the project configuration
p.set_view("default", view_config)

p.set_editable(True)

In set_view
default


In [None]:
# serving the project
p.serve()


created Flask <Flask 'mdvtools.server'>
 * Serving Flask app 'mdvtools.server'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5050
 * Running on http://129.67.46.176:5050
Press CTRL+C to quit
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /static/assets/mdv.css HTTP/1.1" 304 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /static/js/mdv.js HTTP/1.1" 304 -


recieved request to project_index


127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /static/assets/ExpandMore-U9H8ev0k.js HTTP/1.1" 304 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /static/assets/datasourceWorker-BDVgcx14.js HTTP/1.1" 304 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /datasources.json HTTP/1.1" 200 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /state.json HTTP/1.1" 200 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /views.json HTTP/1.1" 200 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "POST /get_view HTTP/1.1" 200 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /static/assets/filteredIndexWorker-CEl1713S.js HTTP/1.1" 304 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /static/assets/filteredIndexWorker-CEl1713S.js HTTP/1.1" 304 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /static/img/fa-solid-900.woff2 HTTP/1.1" 304 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "GET /static/img/roboto-latin-400-normal.woff2 HTTP/1.1" 304 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "POST /get_data HTTP/1.1" 200 -
127.0.0.1 - - [26/Feb/2025 15:43:54] "POST /get_d

In [None]:
# setting up and serving the MDV project
base = os.path.expanduser('~/mdv')
project_path = os.path.join(base, 'spot') # defining the location where the project metadata will be stored
p = MDVProject(os.path.expanduser(project_path), delete_existing=False)

p.serve()