In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
'''Volcano plot with matplotlib showing highlighted candidate proteins'''


# Function to create a volcano plot with highlighted proteins
def create_volcano_plot(df, fold_change_col, p_adj_col, title, output_file, highlight_proteins):
    # Calculate -log10 of adjusted p-values
    df['-log10(p_adj)'] = -np.log10(df[p_adj_col].replace(0, np.nan))  # Handle zero p-values

    # Create figure and axis
    plt.figure(figsize=(10, 8))
    ax = plt.gca()

    # Define thresholds for significance
    significance_cutoff = 1.3  # -log10(p_adj) > 2
    fold_change_up_threshold = 1  # log2 fold change > 1
    fold_change_down_threshold = -1  # log2 fold change < -1

    # Plot non-significant points
    non_sig = df[(df['-log10(p_adj)'] < significance_cutoff) | ((df[fold_change_col] > fold_change_down_threshold) & (df[fold_change_col] < fold_change_up_threshold))]
    ax.scatter(non_sig[fold_change_col], non_sig['-log10(p_adj)'], color='gray', alpha=0.7, s=10)

    # Plot upregulated
    upregulated = df[(df[fold_change_col] > fold_change_up_threshold) & (df['-log10(p_adj)'] > significance_cutoff)]
    ax.scatter(upregulated[fold_change_col], upregulated['-log10(p_adj)'], color='red', alpha=0.7, s=10, label='Upregulated')

    # Plot downregulated
    downregulated = df[(df[fold_change_col] < fold_change_down_threshold) & (df['-log10(p_adj)'] > significance_cutoff)]
    ax.scatter(downregulated[fold_change_col], downregulated['-log10(p_adj)'], color='blue', alpha=0.7, s=10, label='Downregulated')

    # Highlight proteins from the given list (shared proteins)
    shared_proteins = df[df['Gene'].isin(highlight_proteins)]
    ax.scatter(shared_proteins[fold_change_col], shared_proteins['-log10(p_adj)'], color='green', alpha=0.9, s=40, label='TBI Candidate Proteins', edgecolor='black')

    # Add threshold lines
    plt.axvline(x=fold_change_up_threshold, color='gray', linestyle='--', linewidth=1)
    plt.axvline(x=fold_change_down_threshold, color='gray', linestyle='--', linewidth=1)
    plt.axhline(y=significance_cutoff, color='gray', linestyle='--', linewidth=1)

    # Set labels and title
    plt.xlabel('Log2 Fold Change', fontsize=12)
    plt.ylabel('-Log10 (Adjusted P-Value)', fontsize=12)
    plt.title(title, fontsize=14)

    # Add legend
    plt.legend(loc='upper right')

    # Save plot
    plt.savefig(output_file)
    plt.close()  # Close the plot to free memory

# Load the list of proteins to highlight from another DataFrame or file
filepathcandidateproteins = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Input_exceldf/CandidateProteins.xlsx'
highlight_df = pd.read_excel(filepathcandidateproteins)
highlight_proteins = highlight_df['Gene'].tolist()

# Load the results from Excel and create volcano plots
file_path = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Input_exceldf/R_InputHandled_VolcanoPlots.xlsx'
excel_data = pd.ExcelFile(file_path)

for sheet_name in excel_data.sheet_names:
    df_results = excel_data.parse(sheet_name)
    create_volcano_plot(df_results, 'log2_fold_change', 'p_adj', sheet_name,
                        f'/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/GC_Highlighted_{sheet_name}_volcano_plot.png',
                        highlight_proteins)

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Function to create a volcano plot with highlighted proteins
def create_volcano_plot(df, fold_change_col, p_adj_col, title, output_file, highlight_proteins):
    # Calculate -log10 of adjusted p-values
    df['-log10(p_adj)'] = -np.log10(df[p_adj_col].replace(0, np.nan))  # Handle zero p-values

    # Create figure and axis
    plt.figure(figsize=(10, 8))
    ax = plt.gca()

    # Define thresholds for significance
    significance_cutoff = 1.3  # -log10(p_adj) > 2
    fold_change_up_threshold = 1  # log2 fold change > 1
    fold_change_down_threshold = -1  # log2 fold change < -1

    # Plot non-significant points
    non_sig = df[(df['-log10(p_adj)'] < significance_cutoff) | ((df[fold_change_col] > fold_change_down_threshold) & (df[fold_change_col] < fold_change_up_threshold))]
    ax.scatter(non_sig[fold_change_col], non_sig['-log10(p_adj)'], color='gray', alpha=0.7, s=10)

    # Plot upregulated
    upregulated = df[(df[fold_change_col] > fold_change_up_threshold) & (df['-log10(p_adj)'] > significance_cutoff)]
    ax.scatter(upregulated[fold_change_col], upregulated['-log10(p_adj)'], color='red', alpha=0.7, s=10, label='Upregulated')

    # Plot downregulated
    downregulated = df[(df[fold_change_col] < fold_change_down_threshold) & (df['-log10(p_adj)'] > significance_cutoff)]
    ax.scatter(downregulated[fold_change_col], downregulated['-log10(p_adj)'], color='blue', alpha=0.7, s=10, label='Downregulated')

    # Highlight proteins from the given list (shared proteins)
    shared_proteins = df[df['Gene'].isin(highlight_proteins)]
    ax.scatter(shared_proteins[fold_change_col], shared_proteins['-log10(p_adj)'], color='green', alpha=0.9, s=40, label='TBI Candidate Proteins', edgecolor='black')

    # Add threshold lines
    plt.axvline(x=fold_change_up_threshold, color='gray', linestyle='--', linewidth=1)
    plt.axvline(x=fold_change_down_threshold, color='gray', linestyle='--', linewidth=1)
    plt.axhline(y=significance_cutoff, color='gray', linestyle='--', linewidth=1)

    # Set labels and title
    plt.xlabel('Log2 Fold Change', fontsize=14)
    plt.ylabel('-Log10 (Adjusted P-Value)', fontsize=14)
    plt.title(title, fontsize=16)

    # Add legend
    plt.legend(loc='upper right')

    # Save plot
    plt.savefig(output_file)
    plt.close()  # Close the plot to free memory

    # Return the counts of upregulated and downregulated proteins
    return len(upregulated), len(downregulated)


# Load the list of proteins to highlight from another DataFrame or file
filepathcandidateproteins = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Input_exceldf/CandidateProteins.xlsx'
highlight_df = pd.read_excel(filepathcandidateproteins)
highlight_proteins = highlight_df['Gene'].tolist()

# Load the results from Excel and create volcano plots with counts
file_path = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Input_exceldf/R_InputHandled_VolcanoPlots.xlsx'
excel_data = pd.ExcelFile(file_path)

# Dictionary to store upregulated and downregulated counts for each comparison
regulation_counts = {}

for sheet_name in excel_data.sheet_names:
    df_results = excel_data.parse(sheet_name)
    up_count, down_count = create_volcano_plot(
        df_results, 'log2_fold_change', 'p_adj', sheet_name,
        f'/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/{sheet_name}_volcano_plot2.png',
        highlight_proteins
    )
    # Store counts in dictionary
    regulation_counts[sheet_name] = {'Upregulated': up_count, 'Downregulated': down_count}

# Display or save the counts as needed
print(regulation_counts)


{'Injury1-SDT_vs_Injury0': {'Upregulated': 48, 'Downregulated': 54}, 'Injury2-2DG_vs_Injury0': {'Upregulated': 92, 'Downregulated': 74}, 'Injury2-2DG_vs_Injury1-SDT': {'Upregulated': 47, 'Downregulated': 35}}


In [3]:
'''Double vertical lines'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Function to create a volcano plot with highlighted proteins
def create_volcano_plot(df, fold_change_col, p_adj_col, title, output_file, highlight_proteins):
    # Calculate -log10 of adjusted p-values
    df['-log10(p_adj)'] = -np.log10(df[p_adj_col].replace(0, np.nan))  # Handle zero p-values

    # Create figure and axis
    plt.figure(figsize=(10, 8))
    ax = plt.gca()

    # Define thresholds for significance
    significance_cutoff = 1.3  # -log10(p_adj) > 2
    fold_change_up_threshold = 0.5  # log2 fold change > 0.5
    fold_change_down_threshold = -0.5  # log2 fold change < -0.5
    strong_fold_change_up_threshold = 1  # log2 fold change > 1
    strong_fold_change_down_threshold = -1  # log2 fold change < -1

    # Plot non-significant points
    non_sig = df[(df['-log10(p_adj)'] < significance_cutoff) | ((df[fold_change_col] > fold_change_down_threshold) & (df[fold_change_col] < fold_change_up_threshold))]
    ax.scatter(non_sig[fold_change_col], non_sig['-log10(p_adj)'], color='gray', alpha=0.7, s=10)

    # Plot upregulated
    upregulated = df[(df[fold_change_col] > fold_change_up_threshold) & (df['-log10(p_adj)'] > significance_cutoff)]
    ax.scatter(upregulated[fold_change_col], upregulated['-log10(p_adj)'], color='blue', alpha=0.7, s=10, label='Upregulated')

    # Plot downregulated
    downregulated = df[(df[fold_change_col] < fold_change_down_threshold) & (df['-log10(p_adj)'] > significance_cutoff)]
    ax.scatter(downregulated[fold_change_col], downregulated['-log10(p_adj)'], color='red', alpha=0.7, s=10, label='Downregulated')

    # Highlight proteins from the given list (shared proteins)
    shared_proteins = df[df['Gene'].isin(highlight_proteins)]
    ax.scatter(shared_proteins[fold_change_col], shared_proteins['-log10(p_adj)'], color='green', alpha=0.9, s=40, label='TBI Candidate Proteins', edgecolor='black')

    # Add threshold lines
    plt.axvline(x=fold_change_up_threshold, color='gray', linestyle='--', linewidth=1)
    plt.axvline(x=fold_change_down_threshold, color='gray', linestyle='--', linewidth=1)
    plt.axvline(x=strong_fold_change_up_threshold, color='darkgray', linestyle='--', linewidth=1)
    plt.axvline(x=strong_fold_change_down_threshold, color='darkgray', linestyle='--', linewidth=1)
    plt.axhline(y=significance_cutoff, color='gray', linestyle='--', linewidth=1)

    # Set labels and title
    plt.xlabel('Log2 Fold Change', fontsize=14)
    plt.ylabel('-Log10 (Adjusted P-Value)', fontsize=14)
    #plt.title(title, fontsize=16)

    # Add legend
    #plt.legend(loc='upper right')
    plt.legend(loc='lower left')


    # Save plot
    plt.savefig(output_file)
    plt.close()  # Close the plot to free memory

    # Return the counts of upregulated and downregulated proteins
    return len(upregulated), len(downregulated)

# Load the list of proteins to highlight from another DataFrame or file
filepathcandidateproteins = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Input_exceldf/CandidateProteins.xlsx'
highlight_df = pd.read_excel(filepathcandidateproteins)
highlight_proteins = highlight_df['Gene'].tolist()

# Load the results from Excel and create volcano plots with counts
file_path = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Input_exceldf/R_InputHandled_VolcanoPlots.xlsx'
excel_data = pd.ExcelFile(file_path)

# Dictionary to store upregulated and downregulated counts for each comparison
regulation_counts = {}

for sheet_name in excel_data.sheet_names:
    df_results = excel_data.parse(sheet_name)
    up_count, down_count = create_volcano_plot(
        df_results, 'log2_fold_change', 'p_adj', sheet_name,
        f'/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Dbl_{sheet_name}_volcano_plot2.png',
        highlight_proteins
    )
  
    # Store counts in dictionary
    regulation_counts[sheet_name] = {'Upregulated': up_count, 'Downregulated': down_count}

# Display or save the counts as needed
print(regulation_counts)


{'Injury1-SDT_vs_Injury0': {'Upregulated': 246, 'Downregulated': 233}, 'Injury2-2DG_vs_Injury0': {'Upregulated': 346, 'Downregulated': 405}, 'Injury2-2DG_vs_Injury1-SDT': {'Upregulated': 194, 'Downregulated': 216}}


In [6]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# Function to create an interactive volcano plot with Plotly
def create_volcano_plot(df, fold_change_col, p_adj_col, title, output_file, highlight_proteins):
    # Calculate -log10 of adjusted p-values
    df['-log10(p_adj)'] = -np.log10(df[p_adj_col].replace(0, np.nan))  # Handle zero p-values

    # Define thresholds for significance
    significance_cutoff = 2  # -log10(p_adj) > 2
    fold_change_up_threshold = 0.5  # log2 fold change > 0.5
    fold_change_down_threshold = -0.5  # log2 fold change < -0.5
    strong_fold_change_up_threshold = 1  # log2 fold change > 1
    strong_fold_change_down_threshold = -1  # log2 fold change < -1

    # Create a Plotly figure
    fig = go.Figure()

    # Plot non-significant points
    non_sig = df[(df['-log10(p_adj)'] < significance_cutoff) |
                 ((df[fold_change_col] > fold_change_down_threshold) & (df[fold_change_col] < fold_change_up_threshold))]
    fig.add_trace(go.Scatter(
        x=non_sig[fold_change_col],
        y=non_sig['-log10(p_adj)'],
        mode='markers',
        marker=dict(color='gray', size=8),
        name='Non-significant',
        hovertext=non_sig.apply(lambda row: f"Protein ID: {row['Protein']}<br>Gene: {row['Gene']}", axis=1),
        hoverinfo='text'
    ))

    # Plot upregulated points
    upregulated = df[(df[fold_change_col] > fold_change_up_threshold) & (df['-log10(p_adj)'] > significance_cutoff)]
    fig.add_trace(go.Scatter(
        x=upregulated[fold_change_col],
        y=upregulated['-log10(p_adj)'],
        mode='markers',
        marker=dict(color='blue', size=8),
        name='Upregulated',
        hovertext=upregulated.apply(lambda row: f"Protein ID: {row['Protein']}<br>Gene: {row['Gene']}", axis=1),
        hoverinfo='text'
    ))

    # Plot downregulated points
    downregulated = df[(df[fold_change_col] < fold_change_down_threshold) & (df['-log10(p_adj)'] > significance_cutoff)]
    fig.add_trace(go.Scatter(
        x=downregulated[fold_change_col],
        y=downregulated['-log10(p_adj)'],
        mode='markers',
        marker=dict(color='red', size=8),
        name='Downregulated',
        hovertext=downregulated.apply(lambda row: f"Protein ID: {row['Protein']}<br>Gene: {row['Gene']}", axis=1),
        hoverinfo='text'
    ))

    # Highlight proteins from the given list (shared proteins)
    shared_proteins = df[df['Gene'].isin(highlight_proteins)]
    fig.add_trace(go.Scatter(
        x=shared_proteins[fold_change_col],
        y=shared_proteins['-log10(p_adj)'],
        mode='markers',
        marker=dict(color='green', size=10, line=dict(width=1, color='black')),
        name='TBI Candidate Proteins',
        hovertext=shared_proteins.apply(lambda row: f"Protein ID: {row['Protein']}<br>Gene: {row['Gene']}", axis=1),
        hoverinfo='text'
    ))

    # Add threshold lines
    for threshold in [fold_change_up_threshold, fold_change_down_threshold, strong_fold_change_up_threshold, strong_fold_change_down_threshold]:
        fig.add_trace(go.Scatter(
            x=[threshold, threshold],
            y=[0, df['-log10(p_adj)'].max()],
            mode='lines',
            line=dict(color='gray', dash='dash'),
            name=f'Fold Change Threshold {threshold}'
        ))
    fig.add_trace(go.Scatter(
        x=[df[fold_change_col].min(), df[fold_change_col].max()],
        y=[significance_cutoff, significance_cutoff],
        mode='lines',
        line=dict(color='black', dash='dash'),
        name='Significance Cutoff'
    ))

    # Update layout for interactivity
    fig.update_layout(
        title=title,
        xaxis_title='Log2 Fold Change',
        yaxis_title='-Log10 (Adjusted P-Value)',
        legend_title='Legend',
        paper_bgcolor='white',
        plot_bgcolor='white',
        xaxis=dict(gridcolor='lightgray'),
        yaxis=dict(gridcolor='lightgray')
    )

    # Save the interactive plot as an HTML file
    fig.write_html(output_file)

    # Return counts of upregulated and downregulated proteins
    return len(upregulated), len(downregulated)

# Load the list of proteins to highlight
filepathcandidateproteins = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Input_exceldf/CandidateProteins.xlsx'
highlight_df = pd.read_excel(filepathcandidateproteins)
highlight_proteins = highlight_df['Gene'].tolist()

# Load the results from Excel and create volcano plots
file_path = '/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Input_exceldf/R_InputHandled_VolcanoPlots.xlsx'
excel_data = pd.ExcelFile(file_path)

# Dictionary to store upregulated and downregulated counts
regulation_counts = {}

for sheet_name in excel_data.sheet_names:
    df_results = excel_data.parse(sheet_name)
    up_count, down_count = create_volcano_plot(
        df_results, 'log2_fold_change', 'p_adj', sheet_name,
        f'/media/melissa/EXTERNAL_USB/KusterLab_Melissa_Vorster/FinalThesis/Dbl_{sheet_name}_volcano_plot1.html',
        highlight_proteins
    )
    # Store counts in dictionary
    regulation_counts[sheet_name] = {'Upregulated': up_count, 'Downregulated': down_count}

# Display or save the counts
print(regulation_counts)


{'Injury1-SDT_vs_Injury0': {'Upregulated': 233, 'Downregulated': 212}, 'Injury2-2DG_vs_Injury0': {'Upregulated': 322, 'Downregulated': 385}, 'Injury2-2DG_vs_Injury1-SDT': {'Upregulated': 161, 'Downregulated': 205}}


In [6]:
print(df_results.columns)


Index(['Protein', 'Gene', 'Comparison', 'log2_fold_change', 'Lower', 'Upper',
       'p_adj', '-log10(p_adj)'],
      dtype='object')
