In [None]:
import numpy as np
import pandas as pd 
import umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from Bio import SeqIO
import seaborn as sns

In [None]:
client_set=set(SeqIO.index("../fig1/result/drllps_client_clstr_Homo_sapiens.fasta", "fasta").keys())
scaffold_set=set(SeqIO.index("../fig1/result/drllps_scaffold_clstr_Homo_sapiens.fasta", "fasta").keys())
nonllps_set=set(SeqIO.index("../fig1/result/drllps_nonllps_clstr_Homo_sapiens.fasta", "fasta").keys())

mat=np.load("../fig2/PTT5XLU50_human.npy", allow_pickle=True)
mat=mat.item()

list_client=[]
list_nonllps=[]
list_scaffold=[]
client_id=[]
scaffold_id=[]
nonllps_id=[]
for k in mat.keys():
    if k in nonllps_set:
        list_nonllps.append(mat[k])
        nonllps_id.append(k)
    elif k in client_set:
        list_client.append(mat[k])
        client_id.append(k)
    elif k in scaffold_set:
        list_scaffold.append(mat[k])
        scaffold_id.append(k)

In [None]:
df=pd.read_csv("../fig1/result/human_clstr_fea.csv", index_col=0)

In [None]:
df_cli_idr = df.loc[list(client_id)]["IDR"].dropna()
df_sca_idr = df.loc[list(scaffold_id)]["IDR"].dropna()
df_oth_idr = df.loc[list(nonllps_id)]["IDR"].dropna()
df_cli_kd = df.loc[list(client_id)]["Hydrophibicity"].dropna()
df_sca_kd = df.loc[list(scaffold_id)]["Hydrophibicity"].dropna()
df_oth_kd = df.loc[list(nonllps_id)]["Hydrophibicity"].dropna()
df_cli_ch = df.loc[list(client_id)]["ChargedAA"].dropna()
df_sca_ch = df.loc[list(scaffold_id)]["ChargedAA"].dropna()
df_oth_ch = df.loc[list(nonllps_id)]["ChargedAA"].dropna()

In [None]:
x = np.array(list_client+list_nonllps+list_scaffold)

In [None]:
mapper = umap.UMAP(random_state=0, n_neighbors=10, min_dist=0.7)
sc = StandardScaler()

In [None]:
x_scaled = sc.fit_transform(x)
mapper.fit(x_scaled)

In [None]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
x_oth=mapper.transform(sc.transform(np.array(list_nonllps)))
x_cli=mapper.transform(sc.transform(np.array(list_client)))
x_sca=mapper.transform(sc.transform(np.array(list_scaffold)))
#x_pre=mapper.transform(sc.transform(np.array(list_predicted)))
ax.scatter(x_oth[:,0], x_oth[:,1], c="gray", s=0.3, alpha=0.2, label="Non-LLPS")
ax.scatter(x_cli[:,0], x_cli[:,1], c="darkorange", s=0.3, alpha=0.6, label="Client")
#ax.scatter(x_pre[:,0], x_pre[:,1], c="gold", s=0.3, alpha=0.6, label="Predicted client")
ax.scatter(x_sca[:,0], x_sca[:,1], c="blueviolet", s=0.7, alpha=0.8, label="Scaffold")
ax.tick_params(labelbottom=False, bottom=False, labelleft=False, left=False)
plt.legend(fontsize=8)
plt.savefig("result/fig4a1.pdf")
plt.show()

In [None]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
color_dict={"Client":"darkorange", "Scaffold":"blueviolet", "Non-LLPS":"lightgrey"}
for label, df_sub in df.groupby('label'):
    sns.kdeplot(data=df_sub, x="x", y="y", label=label, color=color_dict[label])

ax.tick_params(labelbottom=False, bottom=False, labelleft=False, left=False)
#plt.legend(title='Label')
plt.xlabel("")
plt.ylabel("")
plt.savefig("result/fig4a2.pdf")
plt.show()

In [None]:
df_kd=pd.concat([df_sca_kd, df_cli_kd, df_oth_kd])
kd_vec=[]
for k in df_kd.index:
    kd_vec.append(mat[k])

In [None]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
x_kd=mapper.transform(sc.transform(np.array(kd_vec)))
ax.scatter(x_kd[:,0], x_kd[:,1], c=np.array(df_kd), cmap='Blues', s=1)
ax.tick_params(labelbottom=False, bottom=False, labelleft=False, left=False)
ax.set_title("Hydrophobicity", fontsize=20)
plt.savefig("result/fig4b1.pdf")
plt.show()

In [None]:
df_idr=pd.concat([df_sca_idr, df_cli_idr, df_oth_idr])
idr_vec=[]
for k in df_idr.index:
    idr_vec.append(mat[k])

In [None]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
x_idr=mapper.transform(sc.transform(np.array(idr_vec)))
ax.scatter(x_idr[:,0], x_idr[:,1], c=np.array(df_idr), cmap='Blues', s=1)
ax.tick_params(labelbottom=False, bottom=False, labelleft=False, left=False)
ax.set_title("Disorder", fontsize=20)
plt.savefig("result/fig4b2.pdf")
plt.show()

In [None]:
df_ch=pd.concat([df_sca_ch, df_cli_ch, df_oth_ch])
ch_vec=[]
for k in df_ch.index:
    ch_vec.append(mat[k])

In [None]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
x_ch=mapper.transform(sc.transform(np.array(ch_vec)))
ax.scatter(x_ch[:,0], x_ch[:,1], c=np.array(df_ch), cmap='Blues', s=1)
ax.tick_params(labelbottom=False, bottom=False, labelleft=False, left=False)
ax.set_title("Charged AA", fontsize=20)
plt.savefig("result/fig4b3.pdf")
plt.show()