In [35]:
import pandas as pd
import numpy as np
from bioinfokit import analys, visuz
import matplotlib.pyplot as plt

def plot_volcano(df, lfc_threshold, padj_threshold):
    """
    df: pandas DataFrame containing the data
    lfc_threshold: the log2 fold change threshold
    padj_threshold: the adjusted p-value threshold
    """
    # Ensure the data contains no missing P-Values
    df = df.dropna(subset=['P_Value'])
    
    # Apply log2 transformation to the Fold Change
    df['log2_FC'] = df['Fold_Change'].apply(lambda x: np.log2(x) if x > 0 else None)
    
    # Check for valid log2_FC values
    df = df.dropna(subset=['log2_FC'])
    
    # Categorize the data into upregulated, downregulated, and non-significant
    df['color_add_axy'] = 'Non-significant'
    df.loc[(df['log2_FC'] >= lfc_threshold) & (df['P_Value'] <= padj_threshold), 'color_add_axy'] = 'Upregulated'
    df.loc[(df['log2_FC'] <= -lfc_threshold) & (df['P_Value'] <= padj_threshold), 'color_add_axy'] = 'Downregulated'
    
    # Ensure all three categories (Upregulated, Downregulated, Non-significant) exist
    df['color_add_axy'] = pd.Categorical(df['color_add_axy'], categories=['Upregulated', 'Downregulated', 'Non-significant'])
    
    # Print counts of each category
    print(df['color_add_axy'].value_counts())
    
    # Create the volcano plot using the visuz package
    plt.rcParams['figure.figsize'] = [6, 6]
    visuz.GeneExpression.volcano(df=df, 
                                lfc='log2_FC', 
                                pv='P_Value', 
                                sign_line=True,
                                lfc_thr=(lfc_threshold, lfc_threshold), 
                                pv_thr=(padj_threshold, padj_threshold),
                                plotlegend=True, 
                                legendpos='upper right', 
                                legendanchor=(1.46,1),
                                color=('maroon','gainsboro','steelblue'), 
                                theme='whitesmoke',
                                valpha=1, 
                                dotsize=5,
                                geneid='Compound'
                                )
    plt.show()

Upregulated        9
Non-significant    9
Downregulated      2
Name: color_add_axy, dtype: int64


In [36]:
plot_volcano(df, lfc_threshold, padj_threshold)

Upregulated        9
Non-significant    9
Downregulated      2
Name: color_add_axy, dtype: int64
