In [85]:
import numpy as np
import pandas as pd

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio

In [3]:
def format_number(number):
    suffixes = ['', 'K', 'M', 'B', 'T']

    for i in range(len(suffixes)):
        magnitude = number / (1000 ** i)
        if magnitude < 1000:
            if magnitude < 10:
                formatted = f"{magnitude:.1f}"
            else:
                formatted = f"{magnitude:.0f}"
            return f"{formatted}{suffixes[i]}"
    
    return f"{number:.1e}"

====================== Info ======================  
Total masked positions in Virdian assembly:  93450014  
Total positions in Colman assembly:  254979847  
====================== Info ======================  
Total unmasked positions in Virdian assembly, without errors identified by MAPLE:  3983602552  
Total unmasked positions in Colman assembly, without errors identified by MAPLE:  3822049253  
====================== Info ======================  
Total positions in Virdian assembly identified as errors by MAPLE (Virdian's errors):  12174  
Total positions in Colman assembly identified as errors by MAPLE (Colman's errors):  35629  
====================== For VIR error ======================  
Colman’s assemblies are masked:  513  
Same nucleotide, Colman’s assembly error:  7743  
Same nucleotide, Colman’s assembly not error:  3043  
Diff nucleotide, Colman’s assembly error:  3  
Diff nucleotide, Colman’s assembly not error:  862  
====================== For COL error ======================  
Viridian’s assemblies are masked:  9387  
Same nucleotide, Viridian's assembly error:  7743  
Same nucleotide, Viridian's assembly not error:  7742  
Diff nucleotide, Viridian's assembly error:  3  
Diff nucleotide, Viridian's assembly not error:  10754  

In [1]:
VIR_masked = 93450014
VIR_unmasked = 3983602552
VIR_error = 12174
VIR_both = VIR_masked+VIR_unmasked+VIR_error

VIR_error_othMasked = 513
VIR_error_sameError = 7743
VIR_error_sameCorrect = 3043
VIR_error_diffError = 3
VIR_error_diffCorrect = 862
VIR_error_both = VIR_error_othMasked+VIR_error_sameError+VIR_error_sameCorrect+VIR_error_diffError+VIR_error_diffCorrect

In [16]:
numbers = [VIR_masked, VIR_unmasked, VIR_error]

formatted_numbers_VIR = []

for number in numbers:
    formatted_number = format_number(number)
    formatted_numbers_VIR.append(formatted_number)

print(formatted_numbers_VIR)

numbers = [VIR_error_othMasked, VIR_error_sameError, VIR_error_sameCorrect,
           VIR_error_diffError, VIR_error_diffCorrect]

formatted_numbers_VIR_ERROR = []

for number in numbers:
    formatted_number = format_number(number)
    formatted_numbers_VIR_ERROR.append(formatted_number)

print(formatted_numbers_VIR_ERROR)

['93M', '4.0B', '12K']
['513', '7.7K', '3.0K', '3.0', '862']


In [15]:
# creat DataFrame
data_VIR = {'Category': ['Masked',
                         'Correct-Unmasked',
                         'Error'],
        'Percentage': [np.log(VIR_masked)/VIR_both, 
                       np.log(VIR_unmasked)/VIR_both, 
                       np.log(VIR_error)/VIR_both],
           'Raw_number':formatted_numbers_VIR}
df_VIR_plot = pd.DataFrame(data_VIR)

colors = ['#D9BFCB0', '#49998B', '#C1BEE3']

### Plot
fig = go.Figure(go.Pie(labels=df_VIR_plot['Category'], values=df_VIR_plot['Percentage'], hole=0.4,
                       text=df_VIR_plot['Category']+': <br>' + df_VIR_plot['Raw_number'].astype(str),
                       textinfo='percent+text',
                       marker={'colors': colors},
                       hovertemplate='%{label}<br>Current percentage: %{percent:.1%}<br>: %{text}',
                       textfont={'size': 15},
                       texttemplate='%{text} (%{percent:.1%})'
                      ))

fig.show()

In [20]:
# creat DataFrame
data_VIR_ERR = {'Category': ['Vir_otherMasked',
                         'Vir_sameError',
                         'Vir_sameCorrect',
                         'Vir_diffError',
                         'Vir_diff'],
        'Percentage': [VIR_error_othMasked/VIR_error_both, 
                       VIR_error_sameError/VIR_error_both, 
                       VIR_error_sameCorrect/VIR_error_both, 
                       VIR_error_diffError/VIR_error_both, 
                       VIR_error_diffCorrect/VIR_error_both],
           'Raw_number':formatted_numbers_VIR_ERROR}
df_VIR_ERR_plot = pd.DataFrame(data_VIR_ERR)

colors_ERROR = ['#EFCBB5', '#6E96C7', '#7FB7D1', '#CBE3FF', '#808A54']

### Plot
fig = go.Figure(go.Pie(labels=df_VIR_ERR_plot['Category'], values=df_VIR_ERR_plot['Percentage'], hole=0.4,
                       text=df_VIR_ERR_plot['Category']+': <br>' + df_VIR_ERR_plot['Raw_number'].astype(str),
                       textinfo='percent+text',
                       marker={'colors': colors_ERROR},
                       hovertemplate='%{label}<br>Current percentage: %{percent:.1%}<br>: %{text}',
                       textfont={'size': 15},
                       texttemplate='%{text} (%{percent:.1%})'
                      ))

fig.show()

## COL

In [22]:
COL_masked = 254979847
COL_unmasked = 3822049253
COL_error = 35629
COL_both = COL_masked+COL_unmasked+COL_error

COL_error_othMasked = 9387
COL_error_sameError = 7743
COL_error_sameCorrect = 7742
COL_error_diffError = 3
COL_error_diffCorrect = 10754
COL_error_both = COL_error_othMasked+COL_error_sameError+COL_error_sameCorrect+COL_error_diffError+COL_error_diffCorrect

In [24]:
numbers = [COL_masked, COL_unmasked, COL_error]

formatted_numbers_COL = []

for number in numbers:
    formatted_number = format_number(number)
    formatted_numbers_COL.append(formatted_number)

print(formatted_numbers_COL)

numbers = [COL_error_othMasked, COL_error_sameError, COL_error_sameCorrect,
           COL_error_diffError, COL_error_diffCorrect]

formatted_numbers_COL_ERROR = []

for number in numbers:
    formatted_number = format_number(number)
    formatted_numbers_COL_ERROR.append(formatted_number)

print(formatted_numbers_COL_ERROR)

['255M', '3.8B', '36K']
['9.4K', '7.7K', '7.7K', '3.0', '11K']


In [79]:
# colors = ['#D9BFCB0', '#C1BEE3', '#49998B']
colors = ['#EBD67E', '#7FB7D1', '#89A78E']
colors_ERROR = ['#C9DFCC', '#9AC2A0', '#7AAC9A', '#9EAB3D', '#AAB07C']

In [80]:
# creat DataFrame
data_COL = {'Category': ['Masked',
                         'Correct-Unmasked',
                         'Error'],
        'Percentage': [np.log(COL_masked)/COL_both, 
                       np.log(COL_unmasked)/COL_both, 
                       np.log(COL_error)/COL_both],
           'Raw_number':formatted_numbers_COL}
df_COL_plot = pd.DataFrame(data_COL)

### Plot
fig = go.Figure(go.Pie(labels=df_COL_plot['Category'], values=df_COL_plot['Percentage'], hole=0.4,
                       text=df_COL_plot['Category']+': <br>' + df_COL_plot['Raw_number'].astype(str),
                       textinfo='percent+text',
                       marker={'colors': colors},
                       hovertemplate='%{label}<br>Current percentage: %{percent:.1%}<br>: %{text}',
                       textfont={'size': 15},
                       texttemplate='%{text} (%{percent:.1%})'
                      ))

fig.show()

In [56]:
# creat DataFrame
data_COL_ERR = {'Category': ['Col_otherMasked',
                         'Col_sameError',
                         'Col_sameCorrect',
                         'Col_diffError',
                         'Col_diff'],
        'Percentage': [COL_error_othMasked/COL_error_both, 
                       COL_error_sameError/COL_error_both, 
                       COL_error_sameCorrect/COL_error_both, 
                       COL_error_diffError/COL_error_both, 
                       COL_error_diffCorrect/COL_error_both],
           'Raw_number':formatted_numbers_COL_ERROR}
df_COL_ERR_plot = pd.DataFrame(data_COL_ERR)

### Plot
fig = go.Figure(go.Pie(labels=df_COL_ERR_plot['Category'], values=df_COL_ERR_plot['Percentage'], hole=0.4,
                       text=df_COL_ERR_plot['Category']+': <br>' + df_COL_ERR_plot['Raw_number'].astype(str),
                       textinfo='percent+text',
                       marker={'colors': colors_ERROR},
                       hovertemplate='%{label}<br>Current percentage: %{percent:.1%}<br>: %{text}',
                       textfont={'size': 15},
                       texttemplate='%{text} (%{percent:.1%})'
                      ))

fig.show()

## Comb

In [87]:
from plotly.subplots import make_subplots

# 创建2x2的子图
fig = make_subplots(rows=1, cols=2, subplot_titles=['Viridian alignment', 'Colman\'s alignment'], 
                    specs=[[{'type':'domain'}, {'type':'domain'}]])

# 添加饼图到相应的位置
fig.add_trace(go.Pie(labels=df_VIR_plot['Category'], values=df_VIR_plot['Percentage'], hole=0.4,
                     text=df_VIR_plot['Category']+': <br>' + df_VIR_plot['Raw_number'].astype(str),
                     textinfo='percent+text',
                     marker={'colors': colors},
                     hovertemplate='%{label}<br>Current percentage: %{percent:.1%}<br>: %{text}',
                     textfont={'size': 13},
                     texttemplate='%{text} (%{percent:.1%})'
                    ), 
              row=1, col=1)

fig.add_trace(go.Pie(labels=df_COL_plot['Category'], values=df_COL_plot['Percentage'], hole=0.4,
                     text=df_COL_plot['Category']+': <br>' + df_COL_plot['Raw_number'].astype(str),
                     textinfo='percent+text',
                     marker={'colors': colors},
                     hovertemplate='%{label}<br>Current percentage: %{percent:.1%}<br>: %{text}',
                     textfont={'size': 13},
                     texttemplate='%{text} (%{percent:.1%})'
                    ), 
              row=1, col=2)


"""
fig.show()
"""
# Set titles for the two subplots
# fig.update_layout(title_text="subplots_error")  # Increase figure size

# Save the figure as an HTML file
pio.write_html(fig, 'Figure/subplots_all.html')

In [86]:
fig = make_subplots(rows=1, cols=2, subplot_titles=['Viridian alignment', 'Colman\'s alignment'], 
# fig = make_subplots(rows=1, cols=2, subplot_titles=['Viridian Assembly', 'COVID19 Data Portal'],
                    specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels=label_err, values=df_VIR_ERR_plot['Percentage'], hole=0.4,
                     text=df_VIR_ERR_plot['Category']+': <br>' + df_VIR_ERR_plot['Raw_number'].astype(str),
                     textinfo='percent+text',
                     marker={'colors': colors_ERROR},
                     hovertemplate='%{label}<br>Current percentage: %{percent:.1%}<br>: %{text}',
                     textfont={'size': 13},
                     texttemplate='%{text} (%{percent:.1%})'
                    ), 
              row=1, col=1)

fig.add_trace(go.Pie(labels=label_err, values=df_COL_ERR_plot['Percentage'], hole=0.4,
                     text=df_COL_ERR_plot['Category']+': <br>' + df_COL_ERR_plot['Raw_number'].astype(str),
                     textinfo='percent+text',
                     marker={'colors': colors_ERROR},
                     hovertemplate='%{label}<br>Current percentage: %{percent:.1%}<br>: %{text}',
                     textfont={'size': 13},
                     texttemplate='%{text} (%{percent:.1%})'
                    ), 
              row=1, col=2)
"""
fig.show()
"""
# Set titles for the two subplots
# fig.update_layout(title_text="subplots_error")  # Increase figure size

# Save the figure as an HTML file
pio.write_html(fig, 'Figure/subplots_error.html')