In [31]:
import pandas as pd

In [32]:
data = pd.read_csv('IGEM_teams_2004-2024.csv')
data = data[[year <= 2020 and year >= 2007 for year in data['Year']]]

In [33]:
all_awards = [awards.split(', ') for awards in data['Awards'].dropna()]

all_awards = [
    award
    for awards in all_awards
    for award in awards]

[all_awards.remove('-') for item in all_awards if item == '-']
all_awards = pd.Series(all_awards)
unique_awards =  pd.Series(all_awards.unique())
len(unique_awards.unique())

299

In [34]:
def score_award(award_name: str) -> float:
        if 'Grand Prize' in award_name and '(' not in award_name: return 10
        elif 'Grand Prize' in award_name: return 5
        if 'Best' in award_name and 'Project' in award_name: return 5
        elif 'Best' in award_name: return 2
        return 1

def score_medal(medal: str) -> float:
    match medal:
        case 'Gold': return 1
        case 'Silver': return 0.5
        case 'Bronze': return 0.25
        case _ : return 0

def score_team(medal, awards):
    score = 0
    if type(awards) is not float:
        for award in awards.split(', '):
            score += score_award(awards)
    score += score_medal(medal)
    return score

In [35]:
data['Absolute Score'] = [score_team(data.iloc[index]['Medal'], data.iloc[index]['Awards']) for index in range(len(data))]

In [36]:
data = data[data['Section'] == 'Undergrad']

In [37]:
from sklearn.preprocessing import minmax_scale

for year in data['Year'].sort_values().unique():
    data.loc[data['Year'] == year, 'Scaled Score'] = minmax_scale(data.loc[data['Year'] == year]['Absolute Score']).tolist()

'''
undergrad_data = data[data['Section'] == 'Undergrad']
undergrad_data['Score'] = minmax_scale(undergrad_data['Score'])
'''

"\nundergrad_data = data[data['Section'] == 'Undergrad']\nundergrad_data['Score'] = minmax_scale(undergrad_data['Score'])\n"

In [38]:
from textwrap import wrap
data['Abstract'] = [abstract if abstract != '-' and type(abstract) is not float  else 'This team did not submit an abstract this competition year' for abstract in data['Abstract']]
data['Short Abstract'] = ['<br>'.join(wrap(str(abstract), 50)) for abstract in data['Abstract']]
data['Awards Summary'] = ['<br>'.join(wrap(str(awards), 50)) for awards in data['Awards']]
data['Awards Summary'] = [awards if awards != '-' and awards != 'nan'  else 'This team did not win any awards this competition year' for awards in data['Awards Summary']]

In [39]:
import plotly.express as px

In [40]:
FIG_SIZE = {'width': 1100, 'height': 600}
COLOR_SETTINGS = {'plot_bgcolor':'white',
                  'paper_bgcolor':'white'}

In [11]:
#Figure
fig = px.strip(**FIG_SIZE,
               data_frame=data, x='Year', y='Scaled Score',
               hover_name='Team Name',
               color='Region',
               hover_data=['Absolute Score', 'Wiki', 'Short Abstract', 'Awards Summary'])

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xticks = data['Year'].sort_values().unique()
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Year of Competition',
                'tickmode': 'array',
                'tickvals': xticks}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Team Score<br>(as ''%'' of max score that year)',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'Team score across 13 competition years',
                         'x':0.47}}
fig.update_layout(**title_format)

for tick in xticks:
    fig.add_vline(x=tick, layer='below', line_color='rgba(128, 128, 128, 0.5)')

fig.update_layout(**COLOR_SETTINGS)

fig.update_traces(marker={'size':15}, jitter=1)


fig.show()
fig.write_html('parallel_strip_team_score.html')

In [12]:
#Figure
fig = px.strip(**{'width': 1200, 'height': 2400},
               data_frame=data, x='Year', y='Scaled Score',
               hover_name='Team Name',
               color='Region',
               hover_data=['Absolute Score', 'Wiki', 'Short Abstract', 'Awards Summary'])

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xticks = data['Year'].sort_values().unique()
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Year of Competition',
                'tickmode': 'array',
                'tickvals': xticks}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Team Score<br>(as ''%'' of max score that year)',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'Team score across 13 competition years',
                         'x':0.47}}
fig.update_layout(**title_format)

for tick in xticks:
    fig.add_vline(x=tick, layer='below', line_color='rgba(128, 128, 128, 0.5)')

fig.update_layout(**COLOR_SETTINGS)

fig.update_traces(marker={'size':15}, jitter=1)


fig.show()
fig.write_html('parallel_strip_team_score_long_version.html')

In [23]:
leaderboard = pd.DataFrame([(team, data[data['Team Name'] == team]['Absolute Score'].sum()) for team in data['Team Name'].unique()],
                           index=range(len(data['Team Name'].unique())),
                           columns=['Team Name', 'Lifetime Score'])
leaderboard.sort_values(ascending=False, by='Lifetime Score')[:10]

Unnamed: 0,Team Name,Lifetime Score
478,Slovenia,145.0
544,Imperial College,116.0
282,Heidelberg,115.0
28,Calgary,106.0
514,NCKU Tainan,83.0
407,Vilnius-Lithuania,79.25
68,Peking,79.0
722,William and Mary,75.75
439,Groningen,74.0
548,Imperial College London,66.0


In [24]:
leaderboard['Region'] = [data[data['Team Name'] == team]['Region'].iloc[0] for team in leaderboard['Team Name']]

In [25]:
from sklearn.preprocessing import MinMaxScaler
leaderboard['Scaled Score'] = MinMaxScaler().fit_transform(pd.DataFrame(leaderboard['Lifetime Score']))
leaderboard

Unnamed: 0,Team Name,Lifetime Score,Region,Scaled Score
0,Buenos Aires,1.25,latin-america,0.008621
1,Melbourne,6.50,asia,0.044828
2,UQ-Australia,2.25,asia,0.015517
3,Victoria Australia,1.00,asia,0.006897
4,Macquarie Australia,5.50,asia,0.037931
...,...,...,...,...
811,UTTyler,0.00,north-america,0.000000
812,UUlm,0.25,europe,0.001724
813,UZurich,6.00,europe,0.041379
814,Waseda,6.00,asia,0.041379


In [16]:
fig = px.histogram(**FIG_SIZE, data_frame=leaderboard, x='Scaled Score', color='Region', barmode='overlay', log_y=True)

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Score (as ''%'' of maximum)',
                'tickmode': 'array'}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Frequency of team score',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'Lifetime Team Score Distribution by Region',
                         'x':0.47}}
fig.update_layout(**title_format)

fig.update_layout(**COLOR_SETTINGS)

fig.show()
fig.write_html('regional_score_histogram.html')

In [17]:
fig = px.scatter(data_frame=leaderboard, x='Team Name', y='Lifetime Score', color='Region')
fig.show()

In [18]:
fig = px.strip(**FIG_SIZE, data_frame=leaderboard, x='Region', y='Lifetime Score', color='Region', hover_name='Team Name', log_y=True)
fig.update_traces(jitter=1, marker_size=8)

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Region',
                'tickmode': 'array'}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Lifetime Score',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'Lifetime Team score by Region',
                         'x':0.47}}
fig.update_layout(**title_format)

fig.update_layout(**COLOR_SETTINGS)

fig.show()
fig.write_html('fake_violin_plot.html')

In [19]:
fig = px.box(**FIG_SIZE, data_frame=leaderboard, x='Region', y='Lifetime Score', color='Region', log_y=True, hover_name='Team Name', range_y=[0.75, 100])
fig.update_traces(jitter=1, boxmean=True)

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Region',
                'tickmode': 'array'}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Lifetime Score',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'Lifetime Team score by Region',
                         'x':0.47}}
fig.update_layout(**title_format)

fig.update_layout(**COLOR_SETTINGS)

fig.show()
fig.write_html('score_box_plot.html')

In [20]:
import plotly.graph_objs as go
import webbrowser
from ipywidgets import Output, VBox

#Figure
fig = px.strip(**FIG_SIZE,
               data_frame=data, x='Year', y='Scaled Score', color='Region',
               hover_name='Team Name',
               hover_data=['Absolute Score', 'Wiki', 'Short Abstract', 'Awards Summary'])

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xticks = data['Year'].sort_values().unique()
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Year of Competition',
                'tickmode': 'array',
                'tickvals': xticks}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Team Score<br>(as ''%'' of max score that year)',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'Team score across 13 competition years',
                         'x':0.47}}
fig.update_layout(**title_format)

for tick in xticks:
    fig.add_vline(x=tick, layer='below', line_color='rgba(128, 128, 128, 0.5)')

fig.update_layout(**COLOR_SETTINGS)

fig.update_traces(marker={'size':15}, jitter=1)

fig = go.FigureWidget(fig)

latin_america_points = fig.data[0]
asia_points = fig.data[1]
europe_points = fig.data[2]
north_america_points = fig.data[3]
africa_points = fig.data[4]

out = Output()
@out.capture(clear_output=True)

def open_url_latin_america(trace, points, state):
    if points.point_inds:
        print(points.point_inds)
        ind = points.point_inds[0]
        url = data.Wiki.loc[data['Region'] == 'latin-america'].iloc[ind]
        webbrowser.open_new_tab(url)
latin_america_points.on_click(open_url_latin_america)

def open_url_asia(trace, points, state):
    if points.point_inds:
        print(points.point_inds)
        ind = points.point_inds[0]
        url = data.Wiki.loc[data['Region'] == 'asia'].iloc[ind]
        webbrowser.open_new_tab(url)
asia_points.on_click(open_url_asia)

def open_url_europe(trace, points, state):
    if points.point_inds:
        print(points.point_inds)
        ind = points.point_inds[0]
        url = data.Wiki.loc[data['Region'] == 'europe'].iloc[ind]
        webbrowser.open_new_tab(url)
europe_points.on_click(open_url_europe)

def open_url_north_america(trace, points, state):
    if points.point_inds:
        print(points.point_inds)
        ind = points.point_inds[0]
        url = data.Wiki.loc[data['Region'] == 'north-america'].iloc[ind]
        webbrowser.open_new_tab(url)
north_america_points.on_click(open_url_north_america)

def open_url_africa(trace, points, state):
    if points.point_inds:
        print(points.point_inds)
        ind = points.point_inds[0]
        url = data.Wiki.loc[data['Region'] == 'africa'].iloc[ind]
        webbrowser.open_new_tab(url)
africa_points.on_click(open_url_africa)

VBox([fig, out])

VBox(children=(FigureWidget({
    'data': [{'alignmentgroup': 'True',
              'boxpoints': 'all',
      …

In [21]:
tracks = pd.read_csv('tracks.csv', index_col=0)['category'].to_dict()

def assign_supertrack(track):
    supertrack = None
    try: supertrack = tracks[str(track)]
    except:
        supertrack = 'Other'
    return supertrack

data['Supertrack'] = [assign_supertrack(track) for track in data['Track']]

TRACK_COLORS = {'Basic Research' : '#cda58d',
            'Environment' : '#0DB26B',
            'Other' : '#50C6DF',
            'Manufacturing' : '#5f7687',
            'Applications Research' : '#d1a5e0',
            'Medicine' : '#F391BC',
            'Food, Agriculture & Energy' : '#C8D850',
            'Software, Measurement, & Modeling' : '#9D9FA2',
            'High School' : '#F58345',
            'Entrepreneurship' : '#FFCB05',
            'Hardware' : '#8A6CB0',
            'Open/Community Research' : '#0C8AC8',
            'Policy' : 'Black',
            'Art, Design & Cosmetics': 'Red'}

#Figure
fig = px.strip(**FIG_SIZE,
               data_frame=data, x='Year', y='Scaled Score',
               hover_name='Team Name',
               color='Supertrack',
               hover_data=['Absolute Score', 'Wiki', 'Short Abstract', 'Awards Summary'],
               color_discrete_map = TRACK_COLORS)

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xticks = data['Year'].sort_values().unique()
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Year of Competition',
                'tickmode': 'array',
                'tickvals': xticks}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Team Score<br>(as ''%'' of max score that year)',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'Team score across 13 competition years',
                         'x':0.47}}
fig.update_layout(**title_format)

for tick in xticks:
    fig.add_vline(x=tick, layer='below', line_color='rgba(128, 128, 128, 0.5)')

fig.update_layout(**COLOR_SETTINGS)

fig.update_traces(marker={'size':15}, jitter=1)


fig.show()
fig.write_html('parallel_strip_team_score_by_track.html')

In [11]:
#Figure
fig = px.line(**FIG_SIZE,
               data_frame=data[data['Team Name'] == 'William and Mary'], x='Year', y='Scaled Score',
               hover_name='Team Name',
               hover_data=['Absolute Score', 'Wiki', 'Short Abstract', 'Awards Summary'])

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xticks = data['Year'].sort_values().unique()
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Year of Competition',
                'tickmode': 'array',
                'tickvals': xticks}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Team Score<br>(as ''%'' of max score that year)',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'William and Mary iGEM team score across 6 competition years',
                         'x':0.47}}
fig.update_layout(**title_format)

fig.update_traces({'line_color':'Green'})

fig.update_layout(**COLOR_SETTINGS)

fig.show()
fig.write_html('wm_score_line.html')

In [52]:
teams = ['Harvard', 'William and Mary', 'Yale', 'MIT', 'Cornell', 'Penn', 'Washington']

#Figure
fig = px.line(**FIG_SIZE,
               data_frame=data.loc[[True if team_name in teams else False for team_name in data['Team Name']]],
               x='Year', y='Scaled Score', color='Team Name',
               hover_name='Team Name',
               hover_data=['Absolute Score', 'Wiki', 'Short Abstract', 'Awards Summary'])

#Axes
AXIS_FORMAT = {'title_font_size':20}

#x-axis
xticks = data['Year'].sort_values().unique()
xaxis_format = {**AXIS_FORMAT,
                'title_text': 'Year of Competition',
                'tickmode': 'array',
                'tickvals': xticks}
fig.update_xaxes(**xaxis_format, title = {'font_size':30})

#y-axis
yaxis_format = {**AXIS_FORMAT,
                'title_text': 'Team Score<br>(as ''%'' of max score that year)',
                'gridcolor': 'rgba(128, 128, 128, 0.2)'}
fig.update_yaxes(**yaxis_format, title = {'font_size':30})

#title
title_format = {'title':{'font_size':30,
                         'text':'Comparison of high-performing American iGEM teams',
                         'x':0.47}}
fig.update_layout(**title_format)

fig.update_layout(**COLOR_SETTINGS)

fig.show()
fig.write_html('us_score_line.html')