# Control + Late

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import wasserstein_distance

# Load gene expression data
genes = pd.read_csv("C:/Users/Brayan Gutierrez/Desktop/RNAseq-AMD/Dataset/aak100_cpmdat.csv")

# Keep only expression columns
expr = genes.drop(["Unnamed: 0", "mgs_level"], axis=1)
gene_names = expr.columns
expr_values = expr.to_numpy()

edges = []

# Compute pairwise Wasserstein distances between genes
for i in range(len(gene_names)):
    for j in range(i + 1, len(gene_names)):
        g1 = expr_values[:, i]
        g2 = expr_values[:, j]

        # Wasserstein (Earth Mover’s) distance between gene distributions
        W = wasserstein_distance(g1, g2)

        edges.append([gene_names[i], gene_names[j], W])

# Create dataframe in MEGENA format
edges_df = pd.DataFrame(edges, columns=["from", "to", "distance"])

# Optional: ensure distances are nonnegative and finite
edges_df["distance"] = np.clip(edges_df["distance"], a_min=0, a_max=None)
edges_df.dropna(inplace=True)

# Save to CSV
edges_df.to_csv("C:/Users/Brayan Gutierrez/Desktop/RNAseq-AMD/Dataset/wasserstein_edges.csv", index=False)

print("✅ Pairwise Wasserstein distances computed and saved for MEGENA.")

✅ Pairwise Wasserstein distances computed and saved for MEGENA.


# Control

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import wasserstein_distance

# Load gene expression data
genes = pd.read_csv("C:/Users/Brayan Gutierrez/Desktop/RNAseq-AMD/Dataset/aak100_cpmdat.csv")

genes = genes[genes['mgs_level'] == "MGS1"]

# Keep only expression columns
expr = genes.drop(["Unnamed: 0", "mgs_level"], axis=1)
gene_names = expr.columns
expr_values = expr.to_numpy()

edges = []

# Compute pairwise Wasserstein distances between genes
for i in range(len(gene_names)):
    for j in range(i + 1, len(gene_names)):
        g1 = expr_values[:, i]
        g2 = expr_values[:, j]

        # Wasserstein (Earth Mover’s) distance between gene distributions
        W = wasserstein_distance(g1, g2)

        edges.append([gene_names[i], gene_names[j], W])

# Create dataframe in MEGENA format
edges_df = pd.DataFrame(edges, columns=["from", "to", "distance"])

# Optional: ensure distances are nonnegative and finite
edges_df["distance"] = np.clip(edges_df["distance"], a_min=0, a_max=None)
edges_df.dropna(inplace=True)

# Save to CSV
edges_df.to_csv("C:/Users/Brayan Gutierrez/Desktop/RNAseq-AMD/Dataset/wasserstein_edges_control.csv", index=False)

print("✅ Pairwise Wasserstein distances computed and saved for MEGENA.")

✅ Pairwise Wasserstein distances computed and saved for MEGENA.


# Late

In [2]:
import numpy as np
import pandas as pd
from scipy.stats import wasserstein_distance

# Load gene expression data
genes = pd.read_csv("C:/Users/Brayan Gutierrez/Desktop/RNAseq-AMD/Dataset/aak100_cpmdat.csv")

genes = genes[genes['mgs_level'] == "MGS4"]

# Keep only expression columns
expr = genes.drop(["Unnamed: 0", "mgs_level"], axis=1)
gene_names = expr.columns
expr_values = expr.to_numpy()

edges = []

# Compute pairwise Wasserstein distances between genes
for i in range(len(gene_names)):
    for j in range(i + 1, len(gene_names)):
        g1 = expr_values[:, i]
        g2 = expr_values[:, j]

        # Wasserstein (Earth Mover’s) distance between gene distributions
        W = wasserstein_distance(g1, g2)

        edges.append([gene_names[i], gene_names[j], W])

# Create dataframe in MEGENA format
edges_df = pd.DataFrame(edges, columns=["from", "to", "distance"])

# Optional: ensure distances are nonnegative and finite
edges_df["distance"] = np.clip(edges_df["distance"], a_min=0, a_max=None)
edges_df.dropna(inplace=True)

# Save to CSV
edges_df.to_csv("C:/Users/Brayan Gutierrez/Desktop/RNAseq-AMD/Dataset/wasserstein_edges_late.csv", index=False)

print("✅ Pairwise Wasserstein distances computed and saved for MEGENA.")

✅ Pairwise Wasserstein distances computed and saved for MEGENA.
