In [None]:
#Install packages if not already installed

#pip install umap-learn
#pip install plotly
#pip install kaleido #(needed for plotly)

In [1]:
#Import packages
import numpy as np
import pandas as pd
import umap #for doing UMAP
import plotly.express as px #for plotting

In [None]:
dataraw = pd.read_csv("tissue_type_df_v2_noblank.csv") #read in data - first need to copy the file into same directory as python script or specify path

In [None]:
#Create new dataframe with the quantitative data only by inclulding only columns starting with 'AT' (gene IDs)
datafilter= [col for col in dataraw if col.startswith('AT')]
dataquant = dataraw[datafilter]
#dataquant.head()

In [None]:
#Correct spelling mistake in data
catfilter= [col for col in dataraw if not col.startswith('AT')]
datacat= dataraw[catfilter]
datacat.loc[(datacat.VegetativeRepro == "Hypotocyl"),"VegetativeRepro"]="Hypocotyl"
datacat.loc[(datacat.Tissue_type == "Hypotocyl"),"Tissue_type"]="Hypocotyl"

In [None]:
#Establish the UMAP model, specifying n_neighbors, min_dist, and n_components (2 for 2D, 3 for 3D). Additional parameters could be added here, but these three are most important.
UMAP_2d = umap.UMAP(n_neighbors=10,min_dist=.1,n_components=2)
UMAP_3d = umap.UMAP(n_neighbors=10,min_dist=.1,n_components=3)

#Project the specified UMAP onto the data
proj_2d = UMAP_2d.fit_transform(dataquant)
proj_3d = UMAP_3d.fit_transform(dataquant)
print(proj_2d.shape) #verify that result contains all rows and either 2 columns for 2D or 3 columns for 3D
print(proj_3d.shape)

In [None]:
#Visualize the UMAP using plotly, which makes it easy to create interactive figures. Many additional plotting parameters could be specified here (see https://plotly.com/python/line-and-scatter/)
#Note: points are colored first by abovegorund/belowground, then tissue type, then vegetative/rerproductive. This could be changed if more categories (e.g. treatment) are added in future.
fig_2d = px.scatter(
    proj_2d, x=0, y=1,
    color=dataraw.AboveBelow, labels={'color': 'tissue location'},
    color_discrete_sequence=["gray","red","black","purple","green"]
)
fig_3d = px.scatter_3d(
    proj_3d, x=0, y=1, z=2,
    color=dataraw.AboveBelow, labels={'color': 'tissue location'},
    color_discrete_sequence=["gray","red","black","purple","green"]
)
fig_3d.update_traces(marker_size=3) #makes size of points in 3D graphs similar size to those in 2D

#fig_2d.show()
#fig_3d.show()

fig_2d.write_image("UMAP2D_AboveBelow_10_.1_v2.png") #saves static 2D
fig_3d.write_image("UMAP3D_AboveBelow_10_.1_v2.png") #saves static 3D
fig_3d.write_html("UMAP_AboveBelow_10_.1_v2.html") #saves interactive 3D


In [18]:
#create custom color sequence (starting with gray to match with Other_NA category)
pxseq=px.colors.qualitative.Alphabet[0:22]
colseq=["gray"]
colseq[1:]=pxseq


fig_2d = px.scatter(
    proj_2d, x=0, y=1,
    color=datacat.Tissue_type, labels={'color': 'tissue type'},
    color_discrete_sequence=colseq
    
)
fig_3d = px.scatter_3d(
    proj_3d, x=0, y=1, z=2,
    color=datacat.Tissue_type, labels={'color': 'tissue type'},
    color_discrete_sequence=colseq
)
fig_3d.update_traces(marker_size=3)

#fig_2d.show()
#fig_3d.show()

fig_2d.write_image("UMAP2D_TissueType_10_.1_v2.png")
fig_3d.write_image("UMAP3D_TissueType_10_.1_v2.png")
fig_3d.write_html("UMAP_TissueType_10_.1_v2.html")

In [None]:
fig_2d = px.scatter(proj_2d, x=0, y=1,color=datacat.VegetativeRepro, labels={'color': 'tissue type'},
    color_discrete_sequence=["gray","red","black","purple","green","blue"])
fig_3d = px.scatter_3d(proj_3d, x=0, y=1, z=2,color=datacat.VegetativeRepro, labels={'color': 'tissue type'},
    color_discrete_sequence=["gray","red","black","purple","green","blue"])

#fig_2d.show()
#fig_3d.show()

fig_2d.write_image("UMAP2D_VegRepro_10_.1.png")
fig_3d.write_image("UMAP3D_VegRepro_10_.1.png")
fig_3d.write_html("UMAP_VegRepro_10_.1.html")