# Test Coverage of Machine Learning Projects in Nature Computational Science Journal

#### This notebook aims to create charts that shows the trends of increasing portion of aachine learning projects in Nature Computational Science Journal since 2021 and the change of test coverage percentage of those machine learning projects 

In [1]:
import pandas as pd
import altair as alt

In [2]:

df = pd.read_csv('../data/machine_learning_projects_NCS.csv')
df['num_test_files'] = df['num_test_files'].apply(lambda x: 'No test file' if x == 0 else 'At least 1 test file')
df['cnt']=1
df_ml=df.loc[df['machine_learning_project']=='yes']

In [7]:
base = alt.Chart(df).mark_bar(size=50).encode(
    alt.X('year:N').axis(title='Year'),
    alt.Y('sum(cnt):Q', stack="normalize", axis=alt.Axis(format='.0%', title='Percentage of Articles'))
    )

bars = base.encode(
    color=alt.Color('machine_learning_project', legend=alt.Legend(title='Machine Learning Project'))
)

text = base.mark_text(color='white',align='center', baseline='bottom', 
    dy=35).encode(
    text=alt.Text('sum(cnt)'),
    detail='machine_learning_project:N',
)
chart = bars + text
chart.properties(
    width=300,
    height=400,
    title=alt.Title(
        "Machine Learning Project by Year",
        subtitle=["An increasing focus on machine learning",
                  " in Nature Computational Science articles from 2021 to 2024.",
                  "Numbers in the bar is the number of articles in each category."],
        anchor='start',
        frame='group',
        orient='bottom',
        offset=20
    )
)

In [9]:

base = alt.Chart(df_ml).mark_bar(size=50).encode(
    alt.X('year:N').axis(title='Year'),
    alt.Y('count()', stack="normalize", axis=alt.Axis(format='.0%', title='Percentage of Articles'))
    )

bars = base.encode(
    color=alt.Color('num_test_files', legend=alt.Legend(title='Test File Coverage'))
)

text = base.mark_text(color='white',align='center', baseline='bottom', 
    dy=35).encode(
    text=alt.Text('count()'),
    detail='num_test_files:N',
)
chart = bars + text
chart.properties(
    width=300,
    height=400,
    title=alt.Title(
        "Test File Coverage in Machine Learning Projects Over Time",
        subtitle=["A breakdown of articles with and without test files in",
                  "Nature Computational Science from 2021 to 2024.",
                  "Numbers in the bar is the number of articles in each category."],
        anchor='start',
        frame='group',
        orient='bottom',
        offset=20
    )
)