In [None]:
import plotly.express as px
import pandas as pd

# Read data from the TSV file
file_path = 'results_20240122.csv'
df = pd.read_csv(file_path, sep=',')

# Calculate F1 detection, precision, and accuracy using test_vcf_variants and truth_total_variants
df['True_Positives'] = df['Shared_Variants']
df['False_Positives'] = df['test_vcf_variants'] - df['True_Positives']
df['False_Negatives'] = df['truth_total_variants'] - df['True_Positives']

df['Precision'] = df['True_Positives'] / (df['True_Positives'] + df['False_Positives'])
df['Recall'] = df['True_Positives'] / (df['True_Positives'] + df['False_Negatives'])
df['F1_Score'] = 2 * (df['Precision'] * df['Recall']) / (df['Precision'] + df['Recall'])
df['Accuracy'] = df['True_Positives'] / df['truth_total_variants']


# Plotting
fig1 = px.bar(df, x='Tool', y=['F1_Score', 'Precision', 'Accuracy'], title='Variant Detection Metrics',
              labels={'value': 'Score', 'variable': 'Metric'},
              color='Tool')

fig2 = px.scatter(df, x='Tool', y='Shared_Percentage', title='Shared Percentage per Tool')

fig3 = px.box(df, x='Tool', y='Shared_Variants', title='Distribution of Shared Variants per Tool')

fig4 = px.histogram(df, x='Tool', y='Total_Variants', title='Total Variants per Tool')

# Display all plots in a single Jupyter notebook
fig1.show()
fig2.show()
fig3.show()
fig4.show()


In [None]:
# Plotting F1 metrics
fig = px.scatter(df, x='Tool', y='F1_Score', title='F1 Score for Variant Detection',
             labels={'F1_Score': 'F1 Score', 'Tool': 'Tool'},
             color='Tool')

fig.show()

In [None]:
import plotly.express as px

# Plotting F1 metrics with larger dots
fig = px.scatter(df, x='Tool', y='F1_Score', title='F1 Score for Variant Detection',
                 labels={'F1_Score': 'F1 Score', 'Tool': 'Tool'},
                 color='Tool', size='F1_Score', size_max=15)  # Adjust size_max as needed

fig.show()

In [None]:
# Calculate average percentage of shared variants for each tool
avg_shared_percentage = df.groupby('Tool')['Shared_Percentage'].mean().reset_index()

# Plotting average shared percentage
fig = px.bar(avg_shared_percentage, x='Tool', y='Shared_Percentage', title='Average Shared Percentage for Each Tool',
             labels={'Shared_Percentage': 'Average Shared Percentage', 'Tool': 'Tool'},
             color='Tool')

fig.show()

In [None]:
# Plotting for each sample on the same plot
for sample_id in df['Sample_ID'].unique():
    sample_df = df[df['Sample_ID'] == sample_id]

    # Plotting number and percentage of shared variants on the same plot
    fig = px.bar(sample_df, x='Tool', y=['Shared_Variants', 'Shared_Percentage'],
                 title=f'Number and Percentage of Shared Variants for {sample_id}',
                 labels={'value': 'Count', 'variable': 'Metric', 'Tool': 'Tool'},
                 color='Tool')

    # fig.show()

In [None]:
# Plotting for each sample on the same plot
for sample_id in df['Sample_ID'].unique():
    sample_df = df[df['Sample_ID'] == sample_id]

    # Plotting number and percentage of shared variants on the same plot
    fig = px.bar(sample_df, x='Tool', y=['Shared_Percentage'],
                 title=f'Percentage of Shared Variants for {sample_id}',
                 labels={'value': 'Count', 'variable': 'Metric', 'Tool': 'Tool'},
                 color='Tool', barmode='group')

    fig.show()

    # Plotting F1 Score for each tool
    fig_f1_score = px.bar(sample_df, x='Tool', y='F1_Score',
                          title=f'F1 Score for {sample_id}',
                          labels={'F1_Score': 'F1 Score', 'Tool': 'Tool'},
                          color='Tool')

    fig_f1_score.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Plotting for each sample on the same plot with dual y-axis
for sample_id in df['Sample_ID'].unique():
    sample_df = df[df['Sample_ID'] == sample_id]

    # Create subplots with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Plot number and percentage of shared variants
    fig.add_trace(
        go.Bar(x=sample_df['Tool'], y=sample_df['Shared_Variants'], name='Shared Variants'),
        secondary_y=False,
    )

    fig.add_trace(
        go.Scatter(x=sample_df['Tool'], y=sample_df['Shared_Percentage'], mode='lines+markers', name='Shared Percentage'),
        secondary_y=True,
    )

    # Add figure title
    fig.update_layout(
        title_text=f'Number and Percentage of Shared Variants for {sample_id}',
    )

    # Set x-axis title
    fig.update_xaxes(title_text='Tool')

    # Set y-axes titles
    fig.update_yaxes(title_text='<b>Shared Variants</b>', secondary_y=False)
    fig.update_yaxes(title_text='<b>Shared Percentage</b>', secondary_y=True)

    fig.show()

    # Plot F1 Score with dual y-axis
    fig_f1_score = make_subplots(specs=[[{"secondary_y": True}]])

    fig_f1_score.add_trace(
        go.Bar(x=sample_df['Tool'], y=sample_df['F1_Score'], name='F1 Score'),
        secondary_y=False,
    )

    fig_f1_score.add_trace(
        go.Scatter(x=sample_df['Tool'], y=sample_df['Precision'], mode='lines+markers', name='Precision'),
        secondary_y=True,
    )

    # Add figure title
    fig_f1_score.update_layout(
        title_text=f'F1 Score for {sample_id}',
    )

    # Set x-axis title
    fig_f1_score.update_xaxes(title_text='Tool')

    # Set y-axes titles
    fig_f1_score.update_yaxes(title_text='<b>F1 Score</b>', secondary_y=False)
    fig_f1_score.update_yaxes(title_text='<b>Precision</b>', secondary_y=True)

    fig_f1_score.show()