In [1]:
import altair as alt
import pandas as pd
from io import StringIO
import vl_convert as vlc

In [5]:
import sys
data = pd.DataFrame({
    'Category': ['Physicochemical Properties', 'Sequence', 'Miss Cleavage'],
    'Percentage': [0.567, 0.3, 0.133],
    'Numbers': [17, 9, 4],
    'comb': ["(17) 56.7%", "(9) 30%", "(4) 13.3%"]
})
#'Percentage': [0.533, 0.333, 0.133]
#'Percentage': [16, 10, 4]

midpoints = []
prev = 0
for i in data['Percentage'].to_list():
    midpoint = prev + (i/2)
    midpoints.append(midpoint)
    prev = prev + i
data["Midpoints"] = midpoints

# Define the color scheme with a palette of gray and black
color_scale_bar = alt.Scale(
    domain=['Physicochemical Properties', 'Sequence', 'Miss Cleavage'],
    range=['#AAAAAA', '#555555', '#000000']  # Black, dark gray, and light gray
)
color_scale_text = alt.Scale(
    domain=data['Midpoints'].to_list(),
    range=['#000000', '#FFFFFF', '#FFFFFF']  # Black, dark gray, and light gray
)
# Create the horizontal stacked bar chart
stacked_bar_chart = alt.Chart(data).mark_bar(size=50).encode(
    x=alt.X('Percentage:Q', stack='normalize', axis=None),
    color=alt.Color('Category:N', scale=color_scale_bar, legend=alt.Legend(orient='none', legendX=140, legendY=70, title=None, direction='horizontal')), #columnPadding=-30
    order=alt.Order('Percentage:Q', sort='descending')
)

white_text = alt.Chart(data).mark_text(size=12).encode(#color='white'
    x=alt.X('Midpoints:Q'),
    text=alt.Text('comb:N'),#, format=".1%"),#comment the format out when you go for total numbers
    color=alt.Color('Midpoints:Q', scale=color_scale_text, legend=None)
)

final_chart = (stacked_bar_chart+white_text).properties(width=600, height=50).configure_view(strokeOpacity=0)
#final_chart.save("path/to/bar_chart_features_total_percent.svg")
final_chart

In [2]:
import sys
data = pd.DataFrame({
    'Category': ['Traditional Machine Learning', 'Deep Learning'],
    'Percentage': [0.609, 0.391],
    'Numbers': [14, 9]
})
#'Percentage': [0.533, 0.333, 0.133]
#'Percentage': [16, 10, 4]

midpoints = []
prev = 0
for i in data['Percentage'].to_list():
    midpoint = prev + (i/2)
    midpoints.append(midpoint)
    prev = prev + i
data["Midpoints"] = midpoints

# Define the color scheme with a palette of gray and black
color_scale_bar = alt.Scale(
    domain=['Traditional Machine Learning', 'Deep Learning'],
    range=['#FFFFFF', '#000000']  # Black, dark gray, and light gray
)
color_scale_text = alt.Scale(
    domain=data['Midpoints'].to_list(),
    range=['#000000', '#FFFFFF']  # Black, dark gray, and light gray
)
# Create the horizontal stacked bar chart
stacked_bar_chart = alt.Chart(data).mark_bar(size=50, stroke="gray").encode(
    x=alt.X('Percentage:Q', stack='normalize', axis=None),
    color=alt.Color('Category:N', scale=color_scale_bar, legend=None),#, legend=alt.Legend(orient='bottom', title=None, columnPadding=30)),
    order=alt.Order('Percentage:Q', sort='descending')
)

white_text = alt.Chart(data).mark_text(size=12).encode(#color='white'
    x=alt.X('Midpoints:Q'),
    text=alt.Text('Percentage:Q', format=".1%"),#comment the format out when you go for total numbers
    color=alt.Color('Midpoints:Q', scale=color_scale_text, legend=None)
)

comb_chart = (stacked_bar_chart+white_text).properties(width=900, height=60)#.configure_view(strokeOpacity=1)
#comb_chart.save("Path/to/method_bar.svg")
comb_chart

In [6]:
data_str = """Year\tTitle\tCategory
2006\tA computational approach toward label-free protein quantification using predicted peptide detectability:\tM
2007\tPrediction of peptides observable by mass spectrometry applied at the experimental set level\tM
2007\tPeptideSieve\tM
2008\tAPEX\tM
2008\tA support vector machine model for the prediction of proteotypic peptides for accurate mass and time proteomics\tM
2009\tESPPredictor\tM
2010\tThe Importance of Peptide Detectability for Protein Identification, Quantification, and Experiment Design in MS/MS Proteomics\tM
2011\tCONSeQuence\tM
2014\tPeptideRank\tM
2015\tPPA\tM
2015\tPREGO\tM
2017\tEnhanced Missing Proteins Detection in NCI60 Cell Lines Using an Integrative Search Engine Approach\tM
2018\td::pPop\tD
2019\tAP3\tM
2020\tDeepMSPeptide\tD
2020\tIn silico spectral libraries by deep learning facilitate data-Independant aquisition proteomics\tD
2021\tPepFormer\tD
2021\tCapsNet\tD
2022\tPD-BertEDL\tD
2023\tDeepDetect\tD
2023\tDbyDeep\tD
2023\tPeptideRanger\tM
2024\tKDEAN\tD"""

# Read data into DataFrame
data = pd.read_csv(StringIO(data_str), sep='\t')

# Count occurrences of 'M' and 'D' for each year
counts = data.groupby(['Year', 'Category']).size().unstack(fill_value=0).reset_index()

# Create cumulative sum
counts[['Traditional Machine Learning', 'Deep Learning']] = counts[['M', 'D']].cumsum()

# Melt DataFrame for Altair
counts_melted = counts.melt(id_vars='Year', value_vars=['Traditional Machine Learning', 'Deep Learning'], var_name='Category', value_name='Occurrences')

# Create the stacked bar chart
stacked_bar_chart_new = alt.Chart(counts_melted).mark_bar(stroke="gray").encode(
    x=alt.X('Year:O', title='Year', axis=alt.Axis(labelAngle=-45)),
    y=alt.Y('Occurrences:Q', title='Cumulative Occurrences'),
    color=alt.Color('Category:N', scale=alt.Scale(domain=['Traditional Machine Learning', 'Deep Learning'], range=['white', 'black']), legend=alt.Legend(orient="bottom", title=None, columnPadding=30)),
    order=alt.Order(sort='ascending') # for swap back of the old and new one: order=alt.Order('Category:N', sort='ascending')
).properties(
    width=450,
    height=250,
)

# Display the chart
#stacked_bar_chart_new.save("Path/to/M_D_occurences_swapped.svg") #on my windows
stacked_bar_chart_new

In [7]:
import plotly.express as px
import pandas as pd

# Data for the treemap with additional hierarchy
data = pd.DataFrame({
    'Theme': ['Machine Learning'] * 10,
    'Type': [
        'Traditional Machine Learning', 'Traditional Machine Learning', 'Traditional Machine Learning', 
        'Traditional Machine Learning', 'Traditional Machine Learning', 
        'Deep Learning', 'Deep Learning', 'Deep Learning', 'Deep Learning', 'Deep Learning'
    ],
    'Category': [
        'Random Forest<br>Classifier', 'Multilayer<br>Perceptron', 'Ensemble of<br>different<br>Methods', 
        'Support<br>Vector<br>Machines', 'Gaussian<br>Mixture<br>Likelihood<br>Function', 
        'Bidirectional<br>Long<br>Short-Term<br>Memory Network', 'Feed-Forward<br>Neural Network', 
        'Convolutional<br>Neural Network', 'Gated Recurrent<br>Units', 'Long<br>Short-Term<br>Memory<br>Network'
    ],
    'Percentage': [
        0.2174, 0.2174, 0.087, 0.0435, 0.0435, 0.1304, 0.0435, 0.0435, 0.0435, 0.0435
    ]
})

# Creating the treemap
fig = px.treemap(
    data,
    path=['Theme', 'Type', 'Category'],
    values='Percentage',
    color='Percentage',
    #color_continuous_scale=['#f0f9e8', '#bae4bc', '#7bccc4', '#43a2ca', '#0868ac'],
    color_continuous_scale=['#ffffcc', '#a1dab4', '#41b6c4', '#2c7fb8', '#253494'],
    range_color=[0, max(data['Percentage'])]
)

# Update text properties for all traces

fig.update_traces(
    textinfo='label',
    textfont_size=14,
)

# Display the figure
fig.show()

#fig.write_image(Path/to/treemap_techniques.svg")