# WGCNA
## Finding modules using weighted gene co-expression network analysis (WGCNA)

Load packages

In [None]:
import PyWGCNA
import wgcna.utils as rutils
import matplotlib.pyplot as plt
import pandas as pd

plt.rcParams['savefig.bbox'] = 'tight'
plt.rcParams['axes.labelsize'] = 10
pd.set_option('display.max_rows', 10)

%load_ext autoreload
%autoreload 2

## Perform WGCNA

Settings

In [None]:
# Species name and alias
species = "Species 1"
name = 'S1'

# Paths for count matrix and metadata
count_matrix = "../../input/testdata_out/S1.isoform.TMM.matrix"
sample_mapping = "../../input/testdata_out/S1.sample_mapping.tsv"
iprid = "../../input/testdata_out/S1.iprid.tsv"
goid = "../../input/testdata_out/S1.goid.tsv"
orthogroups = "../../input/testdata_out/N0.tsv"

# Set save options
save_tom = True
save_adjacency_matrix = False
save_WGCNA = False
figure_type = "png"

# Reduce the count matrix size?
reduce_matrix = False

# Output path
output_path = f"/vol/blast/wgcna/output/{name}/"

Prepare WGCNA object

In [None]:
print("Preparing WGCNA object for", name, "dataset...")

# Print general settings
print("Species:", species)
print("Alias:", name)
print("Count matrix:", count_matrix)
print("Sample mapping:", sample_mapping)
print("InterPro:", iprid)
print("GO Terms:", goid)
print("Orthogroups:", orthogroups)
print("Output path:", output_path)

# Print save options
print("Save TOM:", save_tom)
print("Save adjacency matrix:", save_adjacency_matrix)
print("Save WGCNA results:", save_WGCNA)

# Print reduce matrix option
print("Reduce matrix:", reduce_matrix)

# Output file for transformed count matrix
matrix_file = f"{output_path}{name}_matrix.csv"

# Transform and save count table, create pyWGCNA object
rutils.create_dir(output_path)
count_df = rutils.transform_count_matrix(count_matrix)
if reduce_matrix:
    count_df = rutils.remove_random_columns(count_df, percentage=0.9)
count_df.to_csv(matrix_file, index=False)

pyWGCNA_obj = PyWGCNA.WGCNA(
    name=name, 
    species=species, 
    geneExpPath=matrix_file, 
    outputPath=output_path,
    figureType=figure_type,
    save=True
)
pyWGCNA_obj.updateSampleInfo(path=sample_mapping, sep='\t')

In [None]:
!tree {output_path}

Find Modules

In [None]:
print("Starting WGCNA analysis...")

# Perform preprocessing
print("Preprocessing data...")
pyWGCNA_obj.preprocess()

# Try to find modules
print("Finding modules...")
pyWGCNA_obj.findModules()

# Set colors for metadata
rutils.add_combined_column(pyWGCNA_obj.datExpr, drop_others=True)
pyWGCNA_obj.setMetadataColor(
    "Combined_Trait", 
    rutils.generate_stage_color_dict(
        custom_stages=pyWGCNA_obj.datExpr.obs["Combined_Trait"].unique().tolist()
    )
)

In [None]:
!tree {output_path}

Analyze Modules

In [None]:
pyWGCNA_obj.analyseWGCNA(show=False)

In [None]:
!tree {output_path}

Save and process .h5ad

In [None]:
# Process GO terms and IPR
gaf_path = goid if goid.endswith(".gaf.gz") else None
ortho_file = orthogroups if orthogroups.endswith(".tsv") else None

if not gaf_path and goid.endswith(".tsv"):
    rutils.add_go_terms_to_adata(pyWGCNA_obj.datExpr, goid)

if iprid:
    rutils.add_ipr_columns(pyWGCNA_obj.datExpr, iprid)

rutils.prepare_and_save_wgcna(
    pyWGCNA_obj, 
    output_path, 
    gaf_path=gaf_path, 
    ortho_file=ortho_file, 
    save_tom=save_tom, 
    save_adjacency_matrix=save_adjacency_matrix, 
    save_WGCNA=save_WGCNA
)

In [None]:
!tree {output_path}