### Creates a tSNE Visualization of the given PorphyStruct Result data

In [None]:
import numpy as np
import pandas as pd
from data import constants
from mathutil import comp, percentage

In [None]:
df = pd.read_excel(r"C:\Users\jenso\PowerFolders\Forschung\CSD Data Mining\Results.xlsx")
df.fillna("", inplace=True)
dataset = df.loc[(df["Type"] == "Corrole") & (df["Class"] == "Corrole")]
#dataset = df.loc[(df["Type"] != "Porphyrin")]
dataset.reset_index(drop=True, inplace=True)
dataset = percentage.createPercData(dataset)
dataset = comp.createCompData(dataset)
dataset["dominant"] = dataset[constants.compColumns].idxmax(axis=1)


In [None]:
len(dataset)

### Prepare sklearn stuff

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import  SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.cluster import KMeans
! pip install pacmap
from pacmap.pacmap import PaCMAP

In [None]:
preprocess = ColumnTransformer(
    transformers = [
        ("prepocess_ext", Pipeline(
            steps=[
                ('imputer', SimpleImputer(strategy="constant", fill_value=0)),
                # ("scaler", StandardScaler())
                # ("scaler", MaxAbsScaler())
                ("scaler", MinMaxScaler())
            ]
        ), constants.percColumns)
    ], sparse_threshold=0)

### Do TSNE Computation

In [None]:
tsne = TSNE(n_components=2, random_state=69420, perplexity=50, n_iter=5000)
pca = PCA(n_components=2)
pacmap = PaCMAP(n_components=2, num_iters=5000, n_neighbors=None)
svd = TruncatedSVD(n_components=2, n_iter=4000)
df_mapping = pd.DataFrame(tsne.fit_transform(preprocess.fit_transform(dataset)), columns=["x", "y"])


### Add Columns for Viz

In [None]:
df_mapping["M"] = dataset["Metal"]
df_mapping["Doop"] = dataset["DoopExp"]
df_mapping["Group"] = dataset["Group"].astype(str)
df_mapping["dominant"] = dataset["dominant"]
df_mapping["Cu"] = dataset["Metal"] == "Cu"
df_mapping["H"] = dataset["Metal"] == "H"
df_mapping["P"] = dataset["Metal"] == "P"
df_mapping["Ln"] = dataset["Group"] == "Ln"
df_mapping["Class"] = dataset["Class"]
df_mapping["Ligand"] = dataset["Ligand"]
df_mapping["CoordNo"] = dataset["CoordNo"]
df_mapping["Axial"] = dataset["AxialLigand"]
df_mapping["CCDC"] = dataset["CCDC"]


### Plot Stuff

In [None]:
! pip install plotnine
! pip install natsort
! pip install -U kaleido
! pip install plotly

import plotly.express as px
from natsort import index_natsorted
import plotly.express as px
from plotnine import *
import plotly.io as pio
import plotly
plotly.__version__ #5.6.0
import kaleido #required
kaleido.__version__ #0.2.1

In [None]:
df_mapping.sort_values(by="Group", key=lambda x: np.argsort(index_natsorted(df_mapping["Group"])), inplace=True)

In [None]:
ggplot(df_mapping,aes(x="x", y="y", label="M")) + geom_point(aes(size="Doop", colour="dominant")) + geom_text(size=6)

In [None]:
metrix = ["Cu", "H", "P", "dominant", "Group", "Class", "Ln"]
for k in metrix:
    fig = px.scatter(df_mapping, x="x", y="y", color=k, size="Doop", text="M",
    custom_data=["M", "Ligand", "Axial", "CoordNo", "Doop", "CCDC"])
    fig.update_traces(hovertemplate="CCDC: %{customdata[5]}<br>%{customdata[0]} %{customdata[1]} %{customdata[2]} <br>Coordination Number: %{customdata[3]} <br><br>Doop: %{customdata[4]}")
    fig.update_traces(textfont_size=8)
    pio.write_image(fig, f"out/tsne_{k}.png", scale=15)
    pio.write_image(fig, f"out/tsne_{k}.svg", scale=15)
    pio.write_html(fig, f"out/tsne_{k}.html", include_plotlyjs="cdn", include_mathjax="cdn")
