In [None]:
! pip install plotnine
! pip install ipympl
! pip install plotly
! pip install natsort
! pip install -U kaleido

import plotly
plotly.__version__ #5.6.0
import kaleido #required
kaleido.__version__ #0.2.1



In [None]:
%matplotlib widget

In [None]:
import pandas as pd
from plotnine import *
import plotly.express as px
from natsort import index_natsorted
import numpy as np
from util.datasource import anything, corroles
from util.analysis import ext_modes, plot_selector, perc_comp, perc_ext

In [None]:
df = corroles.dataFrame
df["dominant"] = df[plot_selector].idxmax(axis=1)
df

### Select only direct porphystruct output

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE 

In [None]:
preprocess = ColumnTransformer(
    transformers = [
        ("prepocess_ext", Pipeline(
            steps=[
                ('imputer', SimpleImputer(strategy="constant", fill_value=0)),
                ("scaler", StandardScaler())
            ]
        ), perc_ext + ["Doop (exp.)"])
    ], sparse_threshold=0)

In [None]:
tsne = TSNE(n_components=2, random_state=91, perplexity=50)
mapping = tsne.fit_transform(preprocess.fit_transform(df))

In [None]:
df_mapping = pd.DataFrame(mapping, columns=["x", "y"])
df_mapping["category"] = df["category"]
df_mapping["M"] = df["M"]
df_mapping["Doop"] = df["Doop (exp.)"]
df_mapping["Coord_No"] = df["Coord_No"]
df_mapping["No_Subs"] = df["No_Subs"]
df_mapping["Group"] =  df["Group"].astype(str)
df_mapping["dominant"] = df["dominant"]
df_mapping["Ligand"] = df["Ligand"]
df_mapping["Axial"] = df["Axial"]
df_mapping["CCDC"] = df["CCDC"]
df_mapping["cu"] = df["M"] == "Cu"
df_mapping["h"] = df["M"] == "H"
df_mapping["hcu"] = df_mapping["cu"] + df_mapping["h"]
df_mapping["rad"] = df["CCDC"] == 1060277

In [None]:
ggplot(df_mapping, aes(x="x", y="y", label="M")) + geom_point(aes(colour="rad", size="Doop")) + geom_text(size=6)

In [None]:
fig = px.scatter(df_mapping, x="x", y="y", color="cu", size="Doop", text="M", custom_data=["M", "Ligand", "Axial", "Coord_No", "No_Subs",  "Doop", "CCDC", "dominant"], width=900, height=600)
fig.update_traces(hovertemplate="CCDC: %{customdata[6]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br>%{customdata[7]}<br>Doop: %{customdata[5]}")
fig.update_traces(textfont_size=8)
fig.write_html("out/tsne_cu.html", include_plotlyjs="cdn")
fig.write_image("out/tsne_cu.png", engine="kaleido",scale=15)

In [None]:
fig = px.scatter(df_mapping, x="x", y="y",  size="Doop", text="M", custom_data=["M", "Ligand", "Axial", "Coord_No", "No_Subs",  "Doop", "CCDC", "dominant"], width=900, height=600)
fig.update_traces(hovertemplate="CCDC: %{customdata[6]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br>%{customdata[7]}<br>Doop: %{customdata[5]}")
fig.update_traces(textfont_size=8)
fig.write_html("out/tsne_blue.html", include_plotlyjs="cdn")
fig.write_image("out/tsne_blue.png", engine="kaleido",scale=15)

In [None]:
fig = px.scatter(df_mapping, x="x", y="y", color="h", size="Doop", text="M", custom_data=["M", "Ligand", "Axial", "Coord_No", "No_Subs",  "Doop", "CCDC", "dominant"], width=900, height=600)
fig.update_traces(hovertemplate="CCDC: %{customdata[6]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br>%{customdata[7]}<br>Doop: %{customdata[5]}")
fig.update_traces(textfont_size=8)
fig.write_html("out/tsne_h.html", include_plotlyjs="cdn")
fig.write_image("out/tsne_h.png", engine="kaleido",scale=15)

### By Dominant Mode

In [None]:
ggplot(df_mapping, aes(x="x", y="y", label="M")) + geom_point(aes(colour="dominant", size="Doop")) + geom_text(size=6)

In [None]:
fig = px.scatter(df_mapping, x="x", y="y", color="dominant", size="Doop", text="M", custom_data=["M", "Ligand", "Axial", "Coord_No", "No_Subs",  "Doop", "CCDC", "dominant"], width=900, height=600)
fig.update_traces(hovertemplate="CCDC: %{customdata[6]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br>%{customdata[7]}<br>Doop: %{customdata[5]}")
fig.update_traces(textfont_size=8)
fig.write_html("out/tsne_dominant.html", include_plotlyjs="cdn")
fig.write_image("out/tsne_dominant.png", engine="kaleido",scale=15)

### By Group

In [None]:
ggplot(df_mapping, aes(x="x", y="y", label="M")) + geom_point(aes(colour="Group", size="Doop")) + geom_text(size=6)

In [None]:
color = "Group"
df_mapping.sort_values(by=color, key=lambda x: np.argsort(index_natsorted(df_mapping[color])), inplace=True)
fig = px.scatter(df_mapping, x="x", y="y", color=color, size="Doop", text="M", custom_data=["M", "Ligand", "Axial", "Coord_No", "No_Subs",  "Doop", "CCDC", "dominant"], width=900, height=600)
fig.update_traces(hovertemplate="CCDC: %{customdata[6]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br>%{customdata[7]}<br>Doop: %{customdata[5]}")
fig.update_traces(textfont_size=8)
fig.write_html("out/tsne_group.html")

### By Category

In [None]:
ggplot(df_mapping, aes(x="x", y="y", label="M")) + geom_point(aes(colour="category", size="Doop")) + geom_text(size=6)

In [None]:
color = "category"
df_mapping.sort_values(by=color, key=lambda x: np.argsort(index_natsorted(df_mapping[color])), inplace=True)
fig = px.scatter(df_mapping, x="x", y="y", color=color, size="Doop", text="M", custom_data=["M", "Ligand", "Axial", "Coord_No", "No_Subs",  "Doop", "CCDC", "dominant"], width=900, height=600)
fig.update_traces(hovertemplate="CCDC: %{customdata[6]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br>%{customdata[7]}<br>Doop: %{customdata[5]}")
fig.update_traces(textfont_size=8)

fig.write_html("out/tsne_category.html")


### By Coordination Number

In [None]:
ggplot(df_mapping, aes(x="x", y="y", label="M")) + geom_point(aes(colour="Coord_No", size="Doop")) + geom_text(size=6)

In [None]:
color = "Coord_No"
df_mapping.sort_values(by=color, key=lambda x: np.argsort(index_natsorted(df_mapping[color])), inplace=True)
df_mapping[color] = df_mapping[color].astype(str)
fig = px.scatter(df_mapping, x="x", y="y", color=color, size="Doop", text="M", custom_data=["M", "Ligand", "Axial", "Coord_No", "No_Subs",  "Doop", "CCDC", "dominant"], width=900, height=600)
fig.update_traces(hovertemplate="CCDC: %{customdata[6]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br>%{customdata[7]}<br>Doop: %{customdata[5]}")
fig.update_traces(textfont_size=8)

fig.write_html("out/tsne_coordNo.html")


In [None]:
ggplot(df_mapping, aes(x="x", y="y", label="M")) + geom_point(aes(colour="No_Subs", size="Doop")) + geom_text(size=6)

In [None]:
color = "No_Subs"
df_mapping.sort_values(by=color, key=lambda x: np.argsort(index_natsorted(df_mapping[color])), inplace=True)
df_mapping[color] = df_mapping[color].astype(str)
fig = px.scatter(df_mapping, x="x", y="y", color=color, size="Doop", text="M", custom_data=["M", "Ligand", "Axial", "Coord_No", "No_Subs",  "Doop", "CCDC", "dominant"], width=900, height=600)
fig.update_traces(hovertemplate="CCDC: %{customdata[6]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br>%{customdata[7]}<br>Doop: %{customdata[5]}")
fig.update_traces(textfont_size=8)

fig.write_html("out/tsne_NoSubs.html")
