# Checking if scyan is installed

In [None]:
try:
    import scyan
    print("scyan is installed.")
except ImportError:
    print("scyan is not installed. Installing now...")
    !pip install scyan

In [None]:
import anndata
from pathlib import Path
import pandas as pd
import time
import matplotlib.pyplot as plt

# Concatenation of FSC files (WT1)

In [None]:
import anndata
import scyan  # Assurez-vous que scyan est installé et importé correctement
from pathlib import Path

# Chemin du dossier contenant les fichiers FCS
folder_path = Path(r"C:/Users/anush/Desktop/M1 BME/S2/stage/fcs_data/WT-1")

# Lister les chemins des fichiers FCS
fcs_paths = [path for path in folder_path.iterdir() if path.suffix == ".fcs"]

def read_one(path):
    # Lire le fichier FCS en utilisant scyan
    adata_WT1 = scyan.read_fcs(path)
    # Ajouter des informations supplémentaires à adata_WT1.obs
    adata_WT1.obs["file"] = path.stem  # Nom du fichier sans extension
    adata_WT1.obs["batch"] = "NA"  # Remplacez "NA" par le batch correspondant si nécessaire
    return adata_WT1

# Lire tous les fichiers FCS et les combiner en un seul objet AnnData
adata_WT1 = anndata.concat([read_one(p) for p in fcs_paths], index_unique="-")

# Afficher les informations pour vérifier
print(adata_WT1)
print(adata_WT1.obs.head())

# Preprocessing 

In [None]:
# Prétraitement des données
is_cytof = True

if is_cytof:
    # Recommandé pour les données CyTOF
    scyan.preprocess.asinh_transform(adata_WT1)
else:
    # Recommandé pour la cytométrie en flux ou le flux spectral
    scyan.preprocess.auto_logicle_transform(adata_WT1)

# Mise à l'échelle des données
scyan.preprocess.scale(adata_WT1)

# Vérification après prétraitement
print("Données après prétraitement :")
print(adata_WT1)
print(adata_WT1.obs.head())

# Knowledge table

In [None]:
import pandas as pd

# Path to your CSV file
file_path = 'C:/Users/anush/Desktop/M1 BME/S2/stage/Report/Table_ref_V2.csv'

# Read the CSV file
table = pd.read_csv(file_path, index_col=[0, 1])

# Display the first 5 rows
table.head()

# Sanity check

In [None]:
# Check that table.columns are present in adata_WT1.var_names
missing_markers = [col for col in table.columns if col not in adata_WT1.var_names]
if missing_markers:
    print("Markers in table.columns not present in adata_WT1.var_names:")
    print(missing_markers)
else:
    print("All markers in table.columns are present in adata_WT1.var_names.")

# Duplicate marker expression checking

In [None]:
# Check for duplicate rows based on the markers expression
duplicates = table[table.duplicated(keep=False)]

if not duplicates.empty:
    print("Duplicate rows found based on marker expressions:")
    print(duplicates)
else:
    print("No duplicates found based on marker expressions.")

# Time point column creation

In [None]:
# Map time points and treatments
def map_time_point(filename):
    parts = filename.split('_')
    if len(parts) >= 2:
        day = parts[1]  # Extract day (e.g., "Day0", "Day7", "Day10")
        return day
    return 'Unknown'

def map_treatment(filename):
    parts = filename.split('_')
    if len(parts) >= 3:
        treatment = parts[2]  # Extract treatment if available
        return treatment
    return 'None'

adata_WT1.obs["time_point"] = adata_WT1.obs["file"].map(map_time_point)
adata_WT1.obs["treatment"] = adata_WT1.obs["file"].map(map_treatment)

adata_WT1.obs["time_point"] = pd.Categorical(
    adata_WT1.obs["time_point"],
    categories=["Day0", "Day7", "Day10"],
    ordered=True
)

print("Unique time points (with order):")
print(adata_WT1.obs["time_point"].unique())
print("Unique treatments:")
print(adata_WT1.obs["treatment"].unique())

print(adata_WT1.obs.head(8))

# UMAP

In [None]:
import time

# Commencez à mesurer le temps
start_time = time.time()

# Exécution de la fonction UMAP
scyan.tools.umap(adata_WT1, markers=table.columns)

# Arrêtez de mesurer le temps et calculez le temps écoulé
end_time = time.time()
elapsed_time = end_time - start_time

# Affichez le temps écoulé
print(f"Temps écoulé pour le calcul de l'UMAP : {elapsed_time:.2f} secondes")

# Saving the Scyan Project

In [None]:
# Ajouter les données à scyan sans spécifier de chemin
scyan.data.add("WT1", adata_WT1, table)

# Loading the Scyan project

In [None]:
# Load the data
adata_WT1, table = scyan.data.load("WT1")

# Display basic information about the data
print("AnnData object:")
print(adata_WT1)
print("\nMarker table:")
print(table.head())

# Proceed with further analysis or model training

# Model fitting

In [None]:

# Recharger les données si nécessaire
adata_WT1, table = scyan.data.load("WT1")

# Initialiser le modèle avec les données préparées
model = scyan.Scyan(adata_WT1, table,prior_std=0.25, lr=0.0001) # 1st training
#model = scyan.Scyan(adata, table, prior_std=0.25, lr=0.0001) : 
# Ajuster le modèle
model.fit()

# 1st prediction

In [None]:
model.predict()

# Checking the threshold

In [None]:
scyan.plot.log_prob_threshold(adata_WT1)

# UMAP visualization

In [None]:
# Predict cell populations using the model
# Adjust the `log_prob_th` parameter to set a threshold for log-probabilities
threshold = -40

# Perform the prediction with the specified threshold
model.predict(log_prob_th=threshold)

# Optionally, you can print or log the threshold to confirm the value used
print(f"Prediction performed with log probability threshold set to: {threshold}")

# Color Palette

In [None]:
# Define the color palettes for UMAP visualization
# Define the updated unique color palette for UMAP visualization
color_palette = {
    'LSC_CD34+_CD117+': '#D9534F',         # Red
    'LSC_CD34+_CD117-': '#C8102E',         # Dark Red
    'LSC_CD34-_CD117+': '#B02E26',         # Crimson
    'LSC_CD34-_CD117-': '#A7322A',         # Dark Crimson
    'HSC_CD117+': '#1E88E5',               # Blue
    'HSC_CD117-': '#1976D2',               # Dark Blue
    'CD34+CD38+CD123+ HSPCs': '#66BB6A',   # Light Green
    'CMP': '#4CAF50',                      # Green
    'MEP': '#43A047',                      # Dark Green
    'GMP': '#388E3C',                      # Olive Green
    'pDC': '#5C6BC0',                     # Medium Blue
    'NK': '#26A69A',                      # Teal
    'NK-CD62L-': '#004D40',               # Dark Teal
    'B cells': '#FFCA28',                 # Yellow
    'Plasma B': '#FFB300',                # Amber
    'Granulocytes': '#90A4AE',            # Grey
    'Basophils': '#78909C',               # Blue Grey
    'Naive T cells': '#AB47BC',           # Purple
    'CD45RA+ Memory Effector T cell': '#9C27B0', # Dark Purple
    'Central Memory T cell': '#8E24AA',   # Medium Purple
    'Transitional & Memory effector T cell': '#7B1FA2', # Deep Purple
    'Erythrocytes_CD45+': '#E57373',      # Light Coral
    'Erythrocytes_CD45-': '#F44336',      # Red
    'CD11B+_Monocytes': '#BCAAA4',        # Light Brown
    'CD11B+_Monocytes*': '#A1887F',       # Medium Brown
    'Promonocytes': '#D7CCC8',            # Beige
    'Mono_Myeloblastes_CD15+': '#7CB342', # Olive Drab
    'Mono_Myeloblastes_CD15-': '#9CCC65', # Light Olive
}

# Define the updated hierarchical color palette
hierarchy_palette = {
    'LSC': '#D9534F',                    # Red
    'HSC': '#1E88E5',                    # Blue
    'Progenitors': '#66BB6A',            # Light Green
    'Dendritic': '#5C6BC0',              # Medium Blue
    'NK': '#26A69A',                    # Teal
    'B Cells': '#FFCA28',               # Yellow
    'Myeloid': '#90A4AE',               # Grey
    'T Cells': '#AB47BC',               # Purple
    'Erythrocytes': '#E57373',          # Light Coral
    'Monocytes': '#BCAAA4',             # Light Brown
    'NKCD62L-': '#004D40',              # Dark Teal
}

# UMAP level plotting

In [None]:
import matplotlib.pyplot as plt
import scyan

# Create and display the UMAP plot
fig, ax = plt.subplots(figsize=(7, 7))

# Generate the UMAP plot
scyan.plot.umap(adata_WT1, color="scyan_pop_level", ax=ax, title="Scyan predictions_WT1",palette=hierarchy_palette)

# Render the plot to ensure it's generated correctly
plt.show()

# Close the figure to free memory
plt.close(fig)


# UMAP plotting (normal)

In [None]:
import matplotlib.pyplot as plt
import scyan

# Create a new figure with a larger size
fig, ax = plt.subplots(figsize=(7, 7))  # Adjust width and height as needed

# Plot UMAP with the customized figure size
scyan.plot.umap(adata_WT1, color="scyan_pop", ax=ax, title="Scyan predictions_WT1",palette=color_palette)

# Display the plot
plt.show()

# UMAP for each file

In [None]:
import numpy as np
import scyan

# Loop over each unique file in the 'file' column of the obs DataFrame for adata_WT1
for filename in np.unique(adata_WT1.obs["file"]):
    # Subset the data to include only the cells from the current file
    adata_one_file = adata_WT1[adata_WT1.obs["file"] == filename]
    
    # Plot the UMAP for the current subset of data
    scyan.plot.umap(adata_one_file, color="scyan_pop", title=filename, palette=color_palette)

# Specific cell population dynamic plotting

In [None]:
import scyan

# Define the special cell populations of interest
special_populations = [
    'Granulocytes',
    'Basophils',
    'CD11B+_Monocytes',
    'CD11B+_Monocytes*',
    'Promonocytes',
    'Mono_Myeloblastes_CD15+',
    'Mono_Myeloblastes_CD15-',
    'CMP',
    'MEP',
    'GMP',
    'Erythrocytes_CD45+',
    'Erythrocytes_CD45-',
    'LSC_CD34+_CD117+',
    'LSC_CD34+_CD117-',
    'LSC_CD34-_CD117+',
    'LSC_CD34-_CD117-'
]

# Filter the AnnData object to include only the myeloid populations
adata_myeloid = adata_WT1[adata_WT1.obs['scyan_pop'].isin(special_populations)]

# Plot population dynamics for myeloid cells
scyan.plot.pop_dynamics(
    adata_myeloid,        # Use the filtered AnnData object
    time_key="time_point",  # Column name for time points
    groupby="treatment",    # Column name for grouping
    key="scyan_pop"         # Column name for cell populations
)
