# Tutorial : https://towardsdatascience.com/bar-chart-race-in-python-with-matplotlib-8e687a5c8a41

In [None]:
from platform import python_version
python_version()

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.animation as animation
from IPython.display import HTML
%matplotlib inline

In [None]:
df = pd.read_csv('https://gist.githubusercontent.com/johnburnmurdoch/4199dbe55095c3e13de8d5b2e5e5307a/raw/fa018b25c24b7b5f47fd0568937ff6c04e384786/city_populations', 
                 usecols=['name', 'group', 'year', 'value'])
df.head(3)

In [None]:
current_year = 2018
dff = (df[df['year'].eq(current_year)]
       .sort_values(by='value', ascending=True)
       .head(10))
dff

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
ax.barh(dff['name'], dff['value'])

In [None]:
colors = dict(zip(
    ['India', 'Europe', 'Asia', 'Latin America',
     'Middle East', 'North America', 'Africa'],
    ['#adb0ff', '#ffb3ff', '#90d595', '#e48381',
     '#aafbff', '#f7bb5f', '#eafb50']
))
group_lk = df.set_index('name')['group'].to_dict()

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
dff = dff[::-1]   # flip values from top to bottom
# pass colors values to `color=`
ax.barh(dff['name'], dff['value'], color=[colors[group_lk[x]] for x in dff['name']])
# iterate over the values to plot labels and values (Tokyo, Asia, 38194.2)
for i, (value, name) in enumerate(zip(dff['value'], dff['name'])):
    ax.text(value, i,     name,            ha='right')  # Tokyo: name
    ax.text(value, i-.25, group_lk[name],  ha='right')  # Asia: group name
    ax.text(value, i,     value,           ha='left')   # 38194.2: value
# Add year right middle portion of canvas
ax.text(1, 0.4, current_year, transform=ax.transAxes, size=46, ha='right')

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
def draw_barchart(year):
    dff = df[df['year'].eq(year)].sort_values(by='value', ascending=True).tail(10)
    ax.clear()
    ax.barh(dff['name'], dff['value'], color=[colors[group_lk[x]] for x in dff['name']])
    dx = dff['value'].max() / 200
    for i, (value, name) in enumerate(zip(dff['value'], dff['name'])):
        ax.text(value-dx, i,     name,           size=14, weight=600, ha='right', va='bottom')
        ax.text(value-dx, i-.25, group_lk[name], size=10, color='#444444', ha='right', va='baseline')
        ax.text(value+dx, i,     f'{value:,.0f}',  size=14, ha='left',  va='center')
    # ... polished styles
    ax.text(1, 0.4, year, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Population (thousands)', transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, 'The most populous cities in the world from 1500 to 2018',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by @pratapvardhan; credit @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)
    
draw_barchart(2018)

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML
fig, ax = plt.subplots(figsize=(15, 8))
animator = animation.FuncAnimation(fig, draw_barchart, frames=range(1968, 2019))
HTML(animator.to_jshtml())

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML
fig, ax = plt.subplots(figsize=(15, 8))
animator = animation.FuncAnimation(fig, draw_barchart, frames=range(1500, 2019))
animator.save('image.mp4', fps=10, writer="avconv", codec="libx264")

# Validator data

Improve data quality
- Filter some tasks (< 5min in validation ?)

In [None]:
%run ../tasking_manager_stats/data_management
%run ../tasking_manager_stats/map_tools

In [None]:
# Read the merged raw data of several projects
df = pd.read_csv(os.path.join(get_data_dir(), 'merged_stats.csv'), encoding='ISO-8859-1')
print(f'{len(df)} lines')
df.head()

In [None]:
'Jean-Yves Longchamp' in df['Author'].unique()

In [None]:
'JYL45' in df['Author'].unique()

In [None]:
# Replace Jean-Yves Longchamp by new user name
df['Author'] = df['Author'].apply(lambda author: 'JYL45' if author =='Jean-Yves Longchamp' else author)
'Jean-Yves Longchamp' in df['Author'].unique()

In [None]:
# Filter on validation in 2019
df2 = df[df['Type'] == 'VALIDATION']
df2 = df2[df2['Year'] == 2019]
print(f'{len(df2)} lines')
df2.head()

In [None]:
# Keep only one validation by author and task
df3 = df2.groupby(['Project', 'Task', 'Year', 'Month', 'Day', 'Rel. Day', 'Author']).max()
df3 = df3.reset_index()
print(f'{len(df3)} lines')
df3.head()

In [None]:
# Count the number of tasks by day and author
df4 = df3.groupby(['Author', 'Year', 'Month', 'Day']).count()
df4 = df4['Task']
df4 = df4.reset_index()
print(f'{len(df4)} lines')
df4.head()

In [None]:
# Add a column for the day of the year
df4['Day_of_year'] = None
for index, row in df4.iterrows():
    df4.loc[index, 'Day_of_year'] = pd.datetime(row['Year'], row['Month'], row['Day']).timetuple().tm_yday
df4 = df4[['Author', 'Day_of_year', 'Task']]
print(f'{len(df4)} lines')
df4.head()

In [None]:
# Add 0 task validated on all the days
for author in df4['Author'].unique():
    for day in range(1, max(df4['Day_of_year']) + 1):
        df4 = pd.concat([df4, pd.DataFrame(data=[(author, day, 0)], columns=['Author', 'Day_of_year', 'Task'])],
                        axis=0, ignore_index=True)
print(f'{len(df4)} lines')
df4.tail()

In [None]:
# Sum again the empty lines with existing ones to have a line for a day and an author
df4 = df4.groupby(['Author', 'Day_of_year']).sum()
df4 = df4.reset_index()
print(f'{len(df4)} lines')
df4.head()

In [None]:
# Check the number
len(df4['Author'].unique()) * max(df4['Day_of_year'])

In [None]:
# Add a cumsum column named value
df5 = df4.copy()
df5['value'] = None
for author in df5['Author'].unique():
    df_author = df5.loc[df5['Author'] == author,]
    df5.loc[df_author.index, 'value'] = df_author['Task'].cumsum()
print(f'{len(df5)} lines')
df5[df5['Author'] == 'Anaximandre'].head(10)

In [None]:
# Rename and extract useful columns
df6 = df5.rename(index=int, columns={'Author' : 'name'})
df6 = df6[['name', 'Day_of_year', 'value']]
print(f'{len(df6)} lines')
df6[df6['name'] == 'Anaximandre'].head()

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
def draw_barchart(day_of_year):
    top_nb = 10
    dff = df6[df6['Day_of_year'].eq(day_of_year)].sort_values(by='value', ascending=True).tail(top_nb)
    ax.clear()
    ax.barh(np.arange(top_nb), dff['value']) # TODO color
    dx = dff['value'].max() / 200
    for i, (value, name) in enumerate(zip(dff['value'], dff['name'])):
        ax.text(value-dx, i,     name,           size=14, weight=600, ha='right', va='bottom')
        ax.text(value+dx, i,     f'{value:,.0f}',  size=14, ha='left',  va='center')
    # ... polished styles
    ax.text(1, 0.4, day_of_year, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Validated tasks', transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, 'Number of validated tasks on CartONG project in S1 2019',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)
    
draw_barchart(150)

In [None]:
from random import seed
random.seed(3)

In [None]:
# Use color from https://medium.com/@6berardi/how-to-create-a-smooth-bar-chart-race-with-python-ad2daf6510dc
from random import randint
import matplotlib.colors as mc
import colorsys
def transform_color(color, amount = 0.5):

    try:
        c = mc.cnames[color]
    except:
        c = color
        c = colorsys.rgb_to_hls(*mc.to_rgb(c))
    return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])

all_names = df6['name'].unique().tolist()
random_hex_colors = []
for i in range(len(all_names)):
    random_hex_colors.append('#' + '%06X' % randint(0, 0xFFFFFF))

rgb_colors = [transform_color(i, 1) for i in random_hex_colors]
rgb_colors_opacity = [rgb_colors[x] + (0.825,) for x in range(len(rgb_colors))]
rgb_colors_dark = [transform_color(i, 1.12) for i in random_hex_colors]
normal_colors = dict(zip(df6['name'].unique(), rgb_colors_opacity))
dark_colors = dict(zip(df6['name'].unique(), rgb_colors_dark))

In [None]:
# Add additionnal column to choose initial top
df6['initial_ranking'] = 0
for name in df6['name'].unique():
    if df6[(df6['name'] == name) & (df6['Day_of_year'] == 20)]['value'].sum() > 0:
        df6.loc[df6['name'] == name, 'initial_ranking'] = 1
print(f'{len(df6)} lines')
df6[df6['name'] == 'Anaximandre'].head()

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
def draw_barchart(day_of_year):
    top_nb = 10
    dff = df6[df6['Day_of_year'].eq(day_of_year)].sort_values(by=['value', 'initial_ranking'], ascending=True).tail(top_nb)
    total_task_nb = df6[df6['Day_of_year'].eq(day_of_year)]['value'].sum()
    ax.clear()    
    ax.barh(np.arange(top_nb), dff['value'], color = [normal_colors[x] for x in dff['name']], height = 0.8,
            edgecolor =([dark_colors[x] for x in dff['name']]), linewidth = '3')
    dx = dff['value'].max() / 200
    for i, (value, name) in enumerate(zip(dff['value'], dff['name'])):
        ax.text(value-dx, i,     name,           size=14, weight=600, ha='right', va='bottom')
        ax.text(value+dx, i,     value,  size=14, ha='left',  va='center')
    # ... polished styles
    date = (datetime.datetime(2019, 1, 1) + datetime.timedelta(day_of_year - 1)).strftime('%B %d')
    ax.text(1, 0.4, date, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Validated tasks', transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, f'Number of validated tasks on CartONG project in S1 2019 (total {total_task_nb} tasks)',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)
    
draw_barchart(150)

In [None]:
# Put logarithms
fig, ax = plt.subplots(figsize=(15, 8))
def draw_barchart(day_of_year):
    top_nb = 10
    dff = df6[df6['Day_of_year'].eq(day_of_year)].sort_values(by=['value', 'initial_ranking'], ascending=True).tail(top_nb)
    total_task_nb = df6[df6['Day_of_year'].eq(day_of_year)]['value'].sum()
    ax.clear()    
    ax.barh(np.arange(top_nb), dff['value'], color = [normal_colors[x] for x in dff['name']], height = 0.8,
            edgecolor =([dark_colors[x] for x in dff['name']]), linewidth = '3', log=True)
    # Add ('', 0) in ax.barh ?
    dx = np.log(dff['value'].max() - dff['value'].min()) / np.log(200)
    for i, (value, name) in enumerate(zip(dff['value'], dff['name'])):
        ax.text(value*.95, i,     name,           size=14, weight=600, ha='right', va='center')
        ax.text(value*1.05, i,     value,  size=14, ha='left',  va='center')
    # ... polished styles
    date = (datetime.datetime(2019, 1, 1) + datetime.timedelta(day_of_year - 1)).strftime('%B %d')
    ax.text(1, 0.4, date, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Validated tasks', transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, f'Number of validated tasks on CartONG project in S1 2019 (total {total_task_nb} tasks)',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)
    
draw_barchart(150)

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML
fig, ax = plt.subplots(figsize=(17, 10))
animator = animation.FuncAnimation(fig, draw_barchart, frames=range(5, 10))
HTML(animator.to_jshtml())

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML
fig, ax = plt.subplots(figsize=(17, 10))
animator = animation.FuncAnimation(fig, draw_barchart, frames=range(1, max(df6['Day_of_year'])))
HTML(animator.to_jshtml())

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML
fig, ax = plt.subplots(figsize=(17, 10))
animator = animation.FuncAnimation(fig, draw_barchart, frames=range(1, max(df6['Day_of_year'])))
animator.save('validator_s1_2019.mp4', fps=3)

# Time spent by project

In [None]:
print(f'{len(df)} lines')
df.head()

In [None]:
# Group spent time by Project and day
df2 = df.groupby(['Project', 'Year', 'Month', 'Day']).sum()
df2 = df2['Duration']
df2 = df2.reset_index()
print(f'{len(df2)} lines')
df2.head()

In [None]:
# Add a column for the day of the year
df2['Day_of_year'] = None
for index, row in df2.iterrows():
    if row['Year'] < 2019:
        df2.loc[index, 'Day_of_year'] = 0
    else:
        df2.loc[index, 'Day_of_year'] = pd.datetime(row['Year'], row['Month'], row['Day']).timetuple().tm_yday
df2 = df2[['Project', 'Day_of_year', 'Duration']]
print(f'{len(df2)} lines')
df2.head()

In [None]:
# Restrict to a number of days
MAX_DAY = 366
df2 = df2[df2['Day_of_year'] <= MAX_DAY]

In [None]:
# Add 0 duration on all the days
for project in df2['Project'].unique():
    for day in range(0, MAX_DAY):
        df2 = pd.concat([df2, pd.DataFrame(data=[(project, day, 0)], columns=['Project', 'Day_of_year', 'Duration'])],
                        axis=0, ignore_index=True)
print(f'{len(df2)} lines')
df2.tail()

In [None]:
# Sum again the empty lines with existing ones to have a line for a day and a project
df3 = df2.groupby(['Project', 'Day_of_year']).sum()
df3 = df3.reset_index()
print(f'{len(df3)} lines')
df3.head()

In [None]:
# Check if there are projects with no contribution
temp_df = df3.groupby('Project').sum() - df3[df3['Day_of_year'] == 0].set_index('Project')
temp_df[temp_df['Duration'] == 0]

In [None]:
# Remove projects with no contribution
df3 = df3[df3['Project'].apply(lambda project: project not in temp_df[temp_df['Duration'] == 0].index)]

In [None]:
# Check the number of lines
len(df3['Project'].unique()) * MAX_DAY

In [None]:
# Add a cumsum column named value
df4 = df3.copy()
df4['value'] = None
for project in df4['Project'].unique():
    df_project = df4.loc[df4['Project'] == project,]
    df4.loc[df_project.index, 'value'] = df_project['Duration'].cumsum()
print(f'{len(df4)} lines')
df4.head(10)

In [None]:
# Add country data
df_countries = pd.read_csv(os.path.join(get_data_dir(), 'Mapathons_countries.csv'), encoding='ISO-8859-1')
df_countries['Country'] = df_countries['Country'].apply(lambda s: s.replace(' ', ''))
df_countries = df_countries.drop_duplicates()
# Africa :  http://www.actualite-ouest-africaine.org/content/fr/les-six-r%C3%A9gions-de-l%E2%80%99union-africaine
df_countries.loc[df_countries['Country'].isin(['Algérie']), 'Group'] = 'North Africa'
df_countries.loc[df_countries['Country'].isin(['BurkinaFaso', 'Niger', 'Sénégal', 'Mali', 'Nigeria']), 'Group'] = 'West Africa'
df_countries.loc[df_countries['Country'].isin(['Angola', 'AfriqueduSud', 'Zimbabwe', 'Malawi', 'Zambie', 'Mozambique']), 'Group'] = 'South Africa'
df_countries.loc[df_countries['Country'].isin(['Kenya', 'Madagascar', 'Tanzanie', 'Ethiopie', 'Ouganda', 'Uganda', 'Soudan', 'SudSoudan']), 'Group'] = 'Est Africa'
df_countries.loc[df_countries['Country'].isin(['RDC', 'Cameroun', 'Tchad', 'Kalémie', 'Congo']), 'Group'] = 'Central Africa'
# Other continents
df_countries.loc[df_countries['Country'].isin(['Népal', 'Birmanie', 'Bangladesh', 'Kirghizistan', 'Laos', 'Irak']), 'Group'] = 'Asia'
df_countries.loc[df_countries['Country'].isin(['Guyane', 'Equateur']), 'Group'] = 'South America'
df_countries.loc[df_countries['Country'].isin(['Haiti']), 'Group'] = 'Central America'
df_countries.head()

In [None]:
df_countries[pd.isnull(df_countries['Group'])]

In [None]:
len(df4)

In [None]:
# Project without county (normal to have 5848, it is MSF not CartONG)
idx = [df4['Project'].unique()[i] not in df_countries['Project'].unique() for i in range(len(df4['Project'].unique()))]
df4['Project'].unique()[idx]

In [None]:
# Merge country
df5 = pd.merge(df4, df_countries, on='Project')
print(f'{len(df5)} lines')
df5.head()

In [None]:
# Rename, reformat extract useful columns
df6 = df5.rename(index=int, columns={'Project' : 'name'})
df6 = df6[['name', 'Day_of_year', 'value', 'Country', 'Group']]
df6['name'] = df6['name'].apply(str)
df6['value'] = df6['value'] / 3600
print(f'{len(df6)} lines')
df6.head()

In [None]:
df6['Group'].unique()

In [None]:
colors = dict(zip(
    ['Central America', 'South America', 'Asia', 'Est Africa',
     'West Africa', 'Central Africa', 'North Africa'],
    ['#adb0ff', '#ffb3ff', '#90d595', '#e48381',
     '#aafbff', '#f7bb5f', '#eafb50']
))

In [None]:
df6[df6['name'] == '5571']

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
def draw_barchart(day_of_year):
    top_nb = 10
    dff = df6[df6['Day_of_year'].eq(day_of_year)].sort_values(by='value', ascending=True).tail(top_nb)
    total_hour = df6[df6['Day_of_year'].eq(day_of_year)]['value'].sum()
    ax.clear()    
    ax.barh(np.arange(top_nb), dff['value'], color=[colors[x] for x in dff['Group']])
    dx = dff['value'].max() / 200
    for i, (value, name, country, group) in enumerate(zip(dff['value'], dff['name'], dff['Country'], dff ['Group'])):
        ax.text(value-dx, i,     name,           size=14, weight=600, ha='right', va='bottom')
        ax.text(value+dx, i,      f'{value:.1f}',  size=14, ha='left',  va='center')
        ax.text(value-dx, i-.25, country + ' ( ' + group + ' )', size=10, color='#444444', ha='right', va='baseline')
    # ... polished styles
    date = (datetime.datetime(2019, 1, 1) + datetime.timedelta(day_of_year - 1)).strftime('%B %d')
    ax.text(1, 0.4, date, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Time spend (hours)', transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, f'Hours spent on CartONG projects finished in S1 2019 (total : {total_hour:.0f}h)',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)
    
draw_barchart(150)

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML
fig, ax = plt.subplots(figsize=(16, 9))
animator = animation.FuncAnimation(fig, draw_barchart, frames=range(1, max(df6['Day_of_year'])))
HTML(animator.to_jshtml())

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML
fig, ax = plt.subplots(figsize=(16, 9))
animator = animation.FuncAnimation(fig, draw_barchart, frames=range(1, max(df6['Day_of_year'])))
animator.save('time_spent_s1_2019.mp4', fps=3)

### Mapping and validation

In [None]:
df.head()

In [None]:
# Group spent time by Project and day
df2 = df.groupby(['Project', 'Year', 'Month', 'Day', 'Type']).sum()
df2 = df2['Duration']
df2 = df2.reset_index()
print(f'{len(df2)} lines')
df2.head()

In [None]:
# Add a column for the day of the year
df2['Day_of_year'] = None
for index, row in df2.iterrows():
    if row['Year'] < 2019:
        df2.loc[index, 'Day_of_year'] = 0
    else:
        df2.loc[index, 'Day_of_year'] = pd.datetime(row['Year'], row['Month'], row['Day']).timetuple().tm_yday
df2 = df2[['Project', 'Type', 'Day_of_year', 'Duration']]
print(f'{len(df2)} lines')
df2.head()

In [None]:
# Restrict to a number of days
MAX_DAY = 366
df2 = df2[df2['Day_of_year'] <= MAX_DAY]

In [None]:
# Add 0 duration on all the days
for project in df2['Project'].unique():
    for day in range(0, MAX_DAY):
        df2 = pd.concat([df2, pd.DataFrame(data=[(project, 'MAPPING', day, 0)],
                                           columns=['Project', 'Type', 'Day_of_year', 'Duration'])],
                        axis=0, ignore_index=True)
        df2 = pd.concat([df2, pd.DataFrame(data=[(project, 'VALIDATION', day, 0)],
                                           columns=['Project', 'Type', 'Day_of_year', 'Duration'])],
                        axis=0, ignore_index=True)
print(f'{len(df2)} lines')
df2.tail()

In [None]:
# Sum again the empty lines with existing ones to have a line for a day and a project
df3 = df2.groupby(['Project', 'Type', 'Day_of_year']).sum()
df3 = df3.reset_index()
print(f'{len(df3)} lines')
df3.head()

In [None]:
# Check if there are projects with no contribution
temp_df = df3.groupby('Project').sum() - df3[df3['Day_of_year'] == 0].set_index('Project')
temp_df[temp_df['Duration'] == 0]

In [None]:
# Remove projects with no contribution
df3 = df3[df3['Project'].apply(lambda project: project not in temp_df[temp_df['Duration'] == 0].index)]

In [None]:
# Check the number of lines
len(df3['Project'].unique()) * MAX_DAY * 2

In [None]:
# Add country data
df_countries = pd.read_csv(os.path.join(get_data_dir(), 'Mapathons_countries.csv'), encoding='ISO-8859-1')
df_countries['Country'] = df_countries['Country'].apply(lambda s: s.replace(' ', ''))
df_countries = df_countries.drop_duplicates()
# Africa :  http://www.actualite-ouest-africaine.org/content/fr/les-six-r%C3%A9gions-de-l%E2%80%99union-africaine
df_countries.loc[df_countries['Country'].isin(['Algérie']), 'Group'] = 'North Africa'
df_countries.loc[df_countries['Country'].isin(['BurkinaFaso', 'Niger', 'Sénégal', 'Mali', 'Nigeria']), 'Group'] = 'West Africa'
df_countries.loc[df_countries['Country'].isin(['Angola', 'AfriqueduSud', 'Zimbabwe', 'Malawi', 'Zambie', 'Mozambique']), 'Group'] = 'South Africa'
df_countries.loc[df_countries['Country'].isin(['Kenya', 'Madagascar', 'Tanzanie', 'Ethiopie', 'Ouganda', 'Uganda', 'Soudan', 'SudSoudan']), 'Group'] = 'Est Africa'
df_countries.loc[df_countries['Country'].isin(['RDC', 'Cameroun', 'Tchad', 'Kalémie', 'Congo']), 'Group'] = 'Central Africa'
# Other continents
df_countries.loc[df_countries['Country'].isin(['Népal', 'Birmanie', 'Bangladesh', 'Kirghizistan', 'Laos', 'Irak']), 'Group'] = 'Asia'
df_countries.loc[df_countries['Country'].isin(['Guyane', 'Equateur']), 'Group'] = 'South America'
df_countries.loc[df_countries['Country'].isin(['Haiti']), 'Group'] = 'Central America'
df_countries.head()

In [None]:
df_countries[pd.isnull(df_countries['Group'])]

In [None]:
len(df3)

In [None]:
# Project without county (normal to have 5848, it is MSF not CartONG)
idx = [df3['Project'].unique()[i] not in df_countries['Project'].unique() for i in range(len(df3['Project'].unique()))]
df3['Project'].unique()[idx]

In [None]:
# Merge country
df4 = pd.merge(df3, df_countries, on='Project')
print(f'{len(df4)} lines')
df4.head()

In [None]:
colors = dict(zip(
    ['Central America', 'South America', 'Asia', 'Est Africa',
     'West Africa', 'Central Africa', 'North Africa'],
    ['#adb0ff', '#ffb3ff', '#90d595', '#e48381',
     '#aafbff', '#f7bb5f', '#eafb50']
))

In [None]:
# Split in mapping and validation
df_map = df4[df4['Type'] == 'MAPPING']
df_val = df4[df4['Type'] == 'VALIDATION']

In [None]:
def create_df_with_cumsum(df):
    res = df.copy()
    res['value'] = None
    for project in res['Project'].unique():
        df_project = res.loc[res['Project'] == project,]
        res.loc[df_project.index, 'value'] = df_project['Duration'].cumsum()
    return res

In [None]:
# Add a cumsum column named value
df_map2 = create_df_with_cumsum(df_map)
print(f'{len(df_map2)} lines')
df_map2.head()

In [None]:
# Add a cumsum column named value
df_val2 = create_df_with_cumsum(df_val)
print(f'{len(df_val2)} lines')
df_val2.head()

In [None]:
def reformat(df):
    res = df.rename(index=int, columns={'Project' : 'name'})
    res = res[['name', 'Day_of_year', 'Type', 'value', 'Country', 'Group']]
    res['name'] = res['name'].apply(str)
    res['value'] = res['value'] / 3600
    return res

In [None]:
# Rename, reformat extract useful columns
df_map3 = reformat(df_map2)
print(f'{len(df_map3)} lines')
df_map3.head()

In [None]:
# Rename, reformat extract useful columns
df_val3 = reformat(df_val2)
print(f'{len(df_val3)} lines')
df_val3.head()

### Filter on HCR projects

In [None]:
projects = [5847, 5861, 5889, 5890, 5891, 6067, 6068, 6069, 6070, 6072, 6174, 7249, 7390]
len(projects)

In [None]:
df_map4 = df_map3[df_map3['name'].apply(lambda name: int(name) in projects)]
print(f'{len(df_map4)} lines')
df_map4.head()

In [None]:
df_val4 = df_val3[df_val3['name'].apply(lambda name: int(name) in projects)]
print(f'{len(df_val4)} lines')
df_val4.head()

In [None]:
(df_val4.set_index(['name', 'Day_of_year']) + df_map4.set_index(['name', 'Day_of_year']))['value']

In [None]:
fig, ax = plt.subplots(figsize=(16, 9))
def draw_barchart(day_of_year):
    top_nb = 10
    df_map_day = df_map4[df_map4['Day_of_year'].eq(day_of_year)]
    df_val_day = df_val4[df_val4['Day_of_year'].eq(day_of_year)]
    df_total = (df_val_day.set_index(['name', 'Day_of_year']) + df_map_day.set_index(['name', 'Day_of_year']))['value'].reset_index()
    df_total = df_total.sort_values(by='value', ascending=True).tail(top_nb)
    df_map_day = df_map_day.set_index('name').loc[df_total['name']].reset_index()
    df_val_day = df_val_day.set_index('name').loc[df_total['name']].reset_index()
    total_hour = df_total['value'].sum()
    ax.clear()
    ax.barh(np.arange(10), df_map_day['value'], color='y')
    ax.barh(np.arange(10), df_val_day['value'], left=df_map_day['value'].values, color='g')
    dx = df_total['value'].max() / 200
    for i, (value, name) in enumerate(zip(df_total['value'], df_total['name'])):
        ax.text(value-dx, i,     name,           size=14, weight=600, ha='right', va='bottom')
        ax.text(value+dx, i,      f'{value:.1f}',  size=14, ha='left',  va='center')
    # ... polished styles
    date = (datetime.datetime(2019, 1, 1) + datetime.timedelta(day_of_year - 1)).strftime('%B %d')
    ax.text(1, 0.23, date, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Time spend (hours) : mapping time in yellow, validation time in green',
            transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, f'Hours spent on CartONG-HCR Missing Maps projects (total : {total_hour:.0f}h)',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)
    
draw_barchart(150)

In [None]:
os.makedirs('HCR', exist_ok=True)

In [None]:
for i in range(min(df_map4[df_map4['value'] > 0]['Day_of_year']), 366):
    fig, ax = plt.subplots(figsize=(16, 9))
    draw_barchart(int(i))
    plt.savefig(f'HCR/{i}.png')
    plt.close()
    print(i)

In [None]:
from random import seed
random.seed(0)

In [None]:
# Use color from https://medium.com/@6berardi/how-to-create-a-smooth-bar-chart-race-with-python-ad2daf6510dc
from random import randint
import matplotlib.colors as mc
import colorsys
def transform_color(color, amount = 0.5):

    try:
        c = mc.cnames[color]
    except:
        c = color
        c = colorsys.rgb_to_hls(*mc.to_rgb(c))
    return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])

all_names = df_map4['name'].unique().tolist()
random_hex_colors = []
for i in range(len(all_names)):
    random_hex_colors.append('#' + '%06X' % randint(0, 0xFFFFFF))

rgb_colors = [transform_color(i, 1) for i in random_hex_colors]
rgb_colors_opacity = [rgb_colors[x] + (0.825,) for x in range(len(rgb_colors))]
rgb_colors_dark = [transform_color(i, 1.12) for i in random_hex_colors]
normal_colors = dict(zip(df_map4['name'].unique(), rgb_colors_opacity))
dark_colors = dict(zip(df_map4['name'].unique(), rgb_colors_dark))

In [None]:
fig, ax = plt.subplots(figsize=(16, 9))
def draw_barchart2(day_of_year):
    top_nb = 10
    df_map_day = df_map4[df_map4['Day_of_year'].eq(day_of_year)]
    df_val_day = df_val4[df_val4['Day_of_year'].eq(day_of_year)]
    df_total = (df_val_day.set_index(['name', 'Day_of_year']) + df_map_day.set_index(['name', 'Day_of_year']))['value'].reset_index()
    df_total = df_total.sort_values(by='value', ascending=True).tail(top_nb)
    df_map_day = df_map_day.set_index('name').loc[df_total['name']].reset_index()
    df_val_day = df_val_day.set_index('name').loc[df_total['name']].reset_index()
    total_hour = df_total['value'].sum()
    ax.clear()
    ax.barh(np.arange(10) + .1, df_total['value'], height=.8, color = [normal_colors[x] for x in df_total['name']],
            edgecolor =([dark_colors[x] for x in df_total['name']]), linewidth = '3')
    ax.barh(np.arange(10) - .2, df_map_day['value'], height=.3, color='y')
    ax.barh(np.arange(10) - .2, df_val_day['value'], height=.3, left=df_map_day['value'].values, color='g')
    dx = df_total['value'].max() / 200
    for i, (value, name) in enumerate(zip(df_total['value'], df_total['name'])):
        ax.text(value-dx, i,     name,           size=14, weight=600, ha='right', va='bottom')
        ax.text(value+dx, i,      f'{value:.1f}',  size=14, ha='left',  va='center')
    # ... polished styles
    date = (datetime.datetime(2019, 1, 1) + datetime.timedelta(day_of_year - 1)).strftime('%B %d')
    ax.text(1, 0.23, date, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Time spend (hours) : mapping time in yellow, validation time in green',
            transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, f'Hours spent on CartONG-HCR Missing Maps projects (total : {total_hour:.0f}h)',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)
    
draw_barchart2(350)

In [None]:
os.makedirs('HCR2', exist_ok=True)
for i in range(min(df_map4[df_map4['value'] > 0]['Day_of_year']), 366):
    fig, ax = plt.subplots(figsize=(16, 9))
    draw_barchart2(int(i))
    plt.savefig(f'HCR2/{i}.png')
    plt.close()
    print(i)

In [None]:
names = {
    7390: 'Maratane',
    7249: 'Ifo',
    6174: 'Kakuma I',
    6072: 'Tabareybarey',
    6070: 'Abala',
    6069: 'Kakuma II-III',
    6068: 'Kakuma IV',
    6067: 'Kakuma I',
    5891: 'Kebribeyah',
    5890: 'MaiAini',
    5889: 'Aysaita',
    5861: 'Shimelba',
    5847: 'Berhale'
}

In [None]:
fig, ax = plt.subplots(figsize=(16, 9))
def draw_barchart3(day_of_year):
    top_nb = 10
    df_map_day = df_map4[df_map4['Day_of_year'].eq(day_of_year)]
    df_val_day = df_val4[df_val4['Day_of_year'].eq(day_of_year)]
    df_total = (df_val_day.set_index(['name', 'Day_of_year']) + df_map_day.set_index(['name', 'Day_of_year']))['value'].reset_index()
    df_total = df_total.sort_values(by='value', ascending=True).tail(top_nb)
    df_map_day = df_map_day.set_index('name').loc[df_total['name']].reset_index()
    df_val_day = df_val_day.set_index('name').loc[df_total['name']].reset_index()
    total_hour = df_total['value'].sum()
    ax.clear()
    ax.barh(np.arange(10) + .1, df_total['value'], height=.8, color = [normal_colors[x] for x in df_total['name']],
            edgecolor =([dark_colors[x] for x in df_total['name']]), linewidth = '3')
    ax.barh(np.arange(10) - .2, df_map_day['value'], height=.3, color='y')
    ax.barh(np.arange(10) - .2, df_val_day['value'], height=.3, left=df_map_day['value'].values, color='g')
    dx = df_total['value'].max() / 200
    for i, (value, name) in enumerate(zip(df_total['value'], df_total['name'])):
        text = names[int(name)] + ' #' + name if value > max(df_total['value']) / 10 else name
        ax.text(value-dx, i,      text,            size=14, weight=600, ha='right', va='bottom')
        ax.text(value+dx, i,      f'{value:.1f}',  size=14, ha='left',  va='center')
    # ... polished styles
    date = (datetime.datetime(2019, 1, 1) + datetime.timedelta(day_of_year - 1)).strftime('%B %d')
    ax.text(1, 0.23, date, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Time spend (hours) : mapping time in yellow, validation time in green',
            transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, f'Hours spent on CartONG-HCR Missing Maps projects (total : {total_hour:.0f}h)',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)
    
draw_barchart3(350)

In [None]:
fig, ax = plt.subplots(figsize=(16, 9))
draw_barchart3(150)

In [None]:
directory = 'HCR3'
os.makedirs(directory, exist_ok=True)
for i in range(min(df_map4[df_map4['value'] > 0]['Day_of_year']), 366):
    fig, ax = plt.subplots(figsize=(16, 9))
    draw_barchart3(int(i))
    plt.savefig(directory + f'/{i}.png')
    plt.close()
    print(i)

In [None]:
mapathons = pd.read_csv(os.path.join('..', 'data', 'Mapathons_2020_02_08.csv'))
mapathons.head()

In [None]:
mapathons.loc[mapathons['Date'] == '05/04/2019', 'Place'] = 'Nuit de la Géo'
nb_city_nuit_geo = (mapathons['Date'] == '05/04/2019').sum()
mapathons.loc[mapathons['Date'] == '05/04/2019', 'City'] = f'{nb_city_nuit_geo} villes'
mapathons2 = mapathons[mapathons['Requesting NGO (project)'] == 'UNHCR'][['Date', 'City', 'Place']]
mapathons2['Date'] = pd.to_datetime(mapathons2['Date'], format='%d/%m/%Y')
mapathons2['Day_of_year'] = None
for index, row in mapathons2.iterrows():
    mapathons2.loc[index, 'Day_of_year'] = row['Date'].timetuple().tm_yday
print(f'{len(mapathons2)} lines')
mapathons2.head()

In [None]:
mapathons2

In [None]:
fig, ax = plt.subplots(figsize=(16, 9))
def draw_barchart4(day_of_year):
    top_nb = 10
    df_map_day = df_map4[df_map4['Day_of_year'].eq(day_of_year)]
    df_val_day = df_val4[df_val4['Day_of_year'].eq(day_of_year)]
    df_total = (df_val_day.set_index(['name', 'Day_of_year']) + df_map_day.set_index(['name', 'Day_of_year']))['value'].reset_index()
    df_total = df_total.sort_values(by='value', ascending=True).tail(top_nb)
    df_map_day = df_map_day.set_index('name').loc[df_total['name']].reset_index()
    df_val_day = df_val_day.set_index('name').loc[df_total['name']].reset_index()
    total_hour = df_total['value'].sum()
    ax.clear()
    ax.barh(np.arange(10) + .1, df_total['value'], height=.8, color = [normal_colors[x] for x in df_total['name']],
            edgecolor =([dark_colors[x] for x in df_total['name']]), linewidth = '3')
    ax.barh(np.arange(10) - .2, df_map_day['value'], height=.3, color='y')
    ax.barh(np.arange(10) - .2, df_val_day['value'], height=.3, left=df_map_day['value'].values, color='g')
    dx = df_total['value'].max() / 200
    for i, (value, name) in enumerate(zip(df_total['value'], df_total['name'])):
        text = names[int(name)] + ' #' + name if value > max(df_total['value']) / 10 else name
        ax.text(value-dx, i,      text,            size=14, weight=600, ha='right', va='bottom')
        ax.text(value+dx, i,      f'{value:.1f}',  size=14, ha='left',  va='center')
    # ... polished styles
    date = (datetime.datetime(2019, 1, 1) + datetime.timedelta(day_of_year - 1)).strftime('%B %d')
    ax.text(1, 0.23, date, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Time spend (hours) : mapping time in yellow, validation time in green',
            transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, f'Hours spent on CartONG-HCR Missing Maps projects (total : {total_hour:.0f}h)',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    if day_of_year in mapathons2['Day_of_year'].values:
        mapathon_day = mapathons2[mapathons2['Day_of_year'] == day_of_year].iloc[-1]
        ax.text(1, 0.18, mapathon_day['Place'] + ', ' + mapathon_day['City'], transform=ax.transAxes,
                color='#777777', size=16, ha='right', weight=800)
    plt.box(False)
draw_barchart4(323)

In [None]:
fig, ax = plt.subplots(figsize=(16, 9))
draw_barchart4(95)

In [None]:
directory = 'HCR4'
os.makedirs(directory, exist_ok=True)
for i in range(min(df_map4[df_map4['value'] > 0]['Day_of_year']), 366):
    fig, ax = plt.subplots(figsize=(16, 9))
    draw_barchart4(int(i))
    plt.savefig(directory + f'/{i}.png')
    plt.close()
    print(i)

# Merge of 2 templates

In [None]:
%run ../tasking_manager_stats/data_management
%run ../tasking_manager_stats/map_tools

In [None]:
# Read the merged raw data of several projects
df = pd.read_csv(os.path.join(get_data_dir(), 'merged_stats_2020_01_01.csv'), encoding='ISO-8859-1')
print(f'{len(df)} lines')
df.head()

In [None]:
STAT_YEAR = 2019

### Mapping and validation

In [None]:
df.head()

In [None]:
# Group spent time by Project, Type and Day
df2 = df.groupby(['Project', 'Year', 'Month', 'Day', 'Type']).sum()
df2 = df2['Duration']
df2 = df2.reset_index()
print(f'{len(df2)} lines')
df2.head()

In [None]:
# Add a column for the day of the year
df2['Day_of_year'] = None
for index, row in df2.iterrows():
    if row['Year'] < STAT_YEAR:
        df2.loc[index, 'Day_of_year'] = 0
    else:
        df2.loc[index, 'Day_of_year'] = pd.datetime(row['Year'], row['Month'], row['Day']).timetuple().tm_yday
df2 = df2[['Project', 'Type', 'Day_of_year', 'Duration']]
print(f'{len(df2)} lines')
df2.head()

In [None]:
# Restrict to a number of days
MAX_DAY = 366
df2 = df2[df2['Day_of_year'] <= MAX_DAY]

In [None]:
# Add 0 duration on all the days
for project in df2['Project'].unique():
    for day in range(0, MAX_DAY):
        df2 = pd.concat([df2, pd.DataFrame(data=[(project, 'MAPPING', day, 0)],
                                           columns=['Project', 'Type', 'Day_of_year', 'Duration'])],
                        axis=0, ignore_index=True)
        df2 = pd.concat([df2, pd.DataFrame(data=[(project, 'VALIDATION', day, 0)],
                                           columns=['Project', 'Type', 'Day_of_year', 'Duration'])],
                        axis=0, ignore_index=True)
print(f'{len(df2)} lines')
df2.tail()

In [None]:
# Sum again the empty lines with existing ones to have a line for a day and a project
df3 = df2.groupby(['Project', 'Type', 'Day_of_year']).sum()
df3 = df3.reset_index()
print(f'{len(df3)} lines')
df3.head()

In [None]:
# Remove projects with no contribution if there is
temp_df = df3.groupby('Project').sum() - df3[df3['Day_of_year'] == 0].set_index('Project')
if (temp_df['Duration'] == 0).any():
    df3 = df3[df3['Project'].apply(lambda project: project not in temp_df[temp_df['Duration'] == 0].index)]

In [None]:
# Check the number of lines
assert(len(df3) == len(df3['Project'].unique()) * MAX_DAY * 2)
len(df3['Project'].unique()) * MAX_DAY * 2

In [None]:
# Add country data
df_countries = pd.read_csv(os.path.join(get_data_dir(), 'Mapathons_countries.csv'), encoding='ISO-8859-1')
df_countries['Country'] = df_countries['Country'].apply(lambda s: s.replace(' ', ''))
df_countries = df_countries.drop_duplicates()
# Africa :  http://www.actualite-ouest-africaine.org/content/fr/les-six-r%C3%A9gions-de-l%E2%80%99union-africaine
df_countries.loc[df_countries['Country'].isin(['Algérie']), 'Group'] = 'North Africa'
df_countries.loc[df_countries['Country'].isin(['BurkinaFaso', 'Niger', 'Sénégal', 'Mali', 'Nigeria']), 'Group'] = 'West Africa'
df_countries.loc[df_countries['Country'].isin(['Angola', 'AfriqueduSud', 'Zimbabwe', 'Malawi', 'Zambie', 'Mozambique']), 'Group'] = 'South Africa'
df_countries.loc[df_countries['Country'].isin(['Kenya', 'Madagascar', 'Tanzanie', 'Ethiopie', 'Ouganda', 'Uganda', 'Soudan', 'SudSoudan']), 'Group'] = 'Est Africa'
df_countries.loc[df_countries['Country'].isin(['RDC', 'Cameroun', 'Tchad', 'Kalémie', 'Congo']), 'Group'] = 'Central Africa'
# Other continents
df_countries.loc[df_countries['Country'].isin(['Népal', 'Birmanie', 'Bangladesh', 'Kirghizistan', 'Laos', 'Irak']), 'Group'] = 'Asia'
df_countries.loc[df_countries['Country'].isin(['Guyane', 'Equateur']), 'Group'] = 'South America'
df_countries.loc[df_countries['Country'].isin(['Haiti']), 'Group'] = 'Central America'
df_countries.head()

In [None]:
df_countries[pd.isnull(df_countries['Group'])]

In [None]:
len(df3)

In [None]:
# Project without county (normal to have 5848, it is MSF not CartONG)
idx = [df3['Project'].unique()[i] not in df_countries['Project'].unique() for i in range(len(df3['Project'].unique()))]
df3['Project'].unique()[idx]

In [None]:
# Merge country
df4 = pd.merge(df3, df_countries, on='Project')
print(f'{len(df4)} lines')
df4.head()

In [None]:
# Split in mapping and validation
df_map = df4[df4['Type'] == 'MAPPING']
df_val = df4[df4['Type'] == 'VALIDATION']

In [None]:
def create_df_with_cumsum(df):
    res = df.copy()
    res['value'] = None
    for project in res['Project'].unique():
        df_project = res.loc[res['Project'] == project,]
        res.loc[df_project.index, 'value'] = df_project['Duration'].cumsum()
    return res

In [None]:
# Add a cumsum column named value for mapping
df_map2 = create_df_with_cumsum(df_map)
print(f'{len(df_map2)} lines')
df_map2.head()

In [None]:
# Add a cumsum column named value for validation
df_val2 = create_df_with_cumsum(df_val)
print(f'{len(df_val2)} lines')
df_val2.head()

In [None]:
def reformat(df):
    res = df.rename(index=int, columns={'Project' : 'name'})
    res = res[['name', 'Day_of_year', 'Type', 'value', 'Country', 'Group']]
    res['name'] = res['name'].apply(str)
    res['value'] = res['value'] / 3600
    return res

In [None]:
# Rename, reformat extract useful columns for mapping
df_map3 = reformat(df_map2)
print(f'{len(df_map3)} lines')
df_map3.head()

In [None]:
# Rename, reformat extract useful columns for validation
df_val3 = reformat(df_val2)
print(f'{len(df_val3)} lines')
df_val3.head()

### Filter on projects

In [None]:
# WARNING : projects is a list of string !!!!!
projects = df_map3['name'].unique()
len(projects)

In [None]:
df_map4 = df_map3[df_map3['name'].apply(lambda name: name in projects)]
print(f'{len(df_map4)} lines')
df_map4.head()

In [None]:
df_val4 = df_val3[df_val3['name'].apply(lambda name: name in projects)]
print(f'{len(df_val4)} lines')
df_val4.head()

### Mapathon data

In [None]:
nuit_geo_date = '05/04/2019'
mapathons = pd.read_csv(os.path.join('..', 'data', 'Mapathons_2020_02_08.csv'))
mapathons.head(2)

In [None]:
# Filter on date
mapathons = mapathons[mapathons['Year'] == STAT_YEAR]

In [None]:
# Add filter on projects
mapathons['Filter'] = False
for project in projects:
    mapathons['Filter'] |= mapathons['Tasks'].apply(lambda s: project in str(s))

In [None]:
# Agregate Nuit de la Géo
mapathons.loc[mapathons['Date'] == nuit_geo_date, 'Place'] = 'Nuit de la Géo'
nb_city_nuit_geo = (mapathons['Date'] == nuit_geo_date).sum()
mapathons.loc[mapathons['Date'] == nuit_geo_date, 'City'] = f'{nb_city_nuit_geo} villes'

In [None]:
# Extract only useful column
mapathons2 = mapathons[mapathons['Filter']][['Date', 'City', 'Place']]
mapathons2['Date'] = pd.to_datetime(mapathons2['Date'], format='%d/%m/%Y')
mapathons2['Day_of_year'] = None
for index, row in mapathons2.iterrows():
    mapathons2.loc[index, 'Day_of_year'] = row['Date'].timetuple().tm_yday
print(f'{len(mapathons2)} lines')
mapathons2.head()

### Project name

In [None]:
tmProjects = pd.read_csv(os.path.join('..', 'data', 'tmProjects.csv'))
tmProjects.head()

In [None]:
names = dict()
for i in range(len(tmProjects)):
    names[tmProjects.loc[i, 'N° Projet']] = tmProjects.loc[i, 'Short Name']
names

### Plot

In [None]:
colors = dict(zip(
    ['Central America', 'South America', 'Asia', 'Est Africa',
     'West Africa', 'Central Africa', 'North Africa'],
    ['#adb0ff', '#ffb3ff', '#90d595', '#e48381',
     '#aafbff', '#f7bb5f', '#eafb50']
))

In [None]:
day_of_year = 323
top_nb = 10
df_map_day = df_map4[df_map4['Day_of_year'].eq(day_of_year)]
df_val_day = df_val4[df_val4['Day_of_year'].eq(day_of_year)]
df_total = (df_val_day.set_index(['name', 'Day_of_year']) + df_map_day.set_index(['name', 'Day_of_year']))['value'].reset_index()
df_total = pd.merge(df_total, df_map_day[['name', 'Country', 'Group']], how='left', on='name')
df_total = df_total.sort_values(by='value', ascending=True).tail(top_nb)

In [None]:
df_total

In [None]:
def draw_day_mapathon(day_of_year):
    if day_of_year in mapathons2['Day_of_year'].values:
        mapathon_day = mapathons2[mapathons2['Day_of_year'] == day_of_year].iloc[-1]
        ax.text(1, 0.18, mapathon_day['Place'] + ', ' + mapathon_day['City'], transform=ax.transAxes,
                color='#777777', size=16, ha='right', weight=800)

In [None]:
def draw_week_mapathon(day_of_year):
    j = 0
    for i in range(7):
        if day_of_year - i in mapathons2['Day_of_year'].values:
            mapathon_day = mapathons2[mapathons2['Day_of_year'] == day_of_year - i].iloc[-1]
            date = (datetime.datetime(STAT_YEAR, 1, 1) + datetime.timedelta(day_of_year - 1 - i)).strftime('%m-%d')
            ax.text(1, 0.18 - 0.04 * j, date + ' : ' + mapathon_day['Place'] + ', ' + mapathon_day['City'],
                    transform=ax.transAxes, color='#777777', size=16, ha='right', weight=800)
            j += 1

In [None]:
fig, ax = plt.subplots(figsize=(16, 9))
def draw_barchart(day_of_year, draw_mapathon=draw_day_mapathon):
    top_nb = 10
    df_map_day = df_map4[df_map4['Day_of_year'].eq(day_of_year)]
    df_val_day = df_val4[df_val4['Day_of_year'].eq(day_of_year)]
    df_total = (df_val_day.set_index(['name', 'Day_of_year']) + df_map_day.set_index(['name', 'Day_of_year']))['value'].reset_index()
    df_total = pd.merge(df_total, df_map_day[['name', 'Country', 'Group']], how='left', on='name')
    df_total = df_total.sort_values(by='value', ascending=True).tail(top_nb)
    df_map_day = df_map_day.set_index('name').loc[df_total['name']].reset_index()
    df_val_day = df_val_day.set_index('name').loc[df_total['name']].reset_index()
    total_hour = df_total['value'].sum()
    ax.clear()
    ax.barh(np.arange(10) + .1, df_total['value'], height=.8, color=[colors[x] for x in df_total['Group']], linewidth = '3')
    ax.barh(np.arange(10) - .2, df_map_day['value'], height=.3, color='y')
    ax.barh(np.arange(10) - .2, df_val_day['value'], height=.3, left=df_map_day['value'].values, color='g')
    dx = df_total['value'].max() / 200
    for i, (value, name, country, group) in enumerate(zip(df_total['value'], df_total['name'], df_total['Country'], df_total['Group'])):
        text = str(names[int(name)]) + ' #' + name if value > max(df_total['value']) / 10 else name
        ax.text(value-dx, i+.2,      text,            size=14, weight=600, ha='right', va='bottom')
        ax.text(value+dx, i,      f'{value:.1f}',  size=14, ha='left',  va='center')
        ax.text(value-dx, i, country + ' ( ' + group + ' )', size=10, color='#444444', ha='right', va='baseline')
    date = (datetime.datetime(STAT_YEAR, 1, 1) + datetime.timedelta(day_of_year - 1)).strftime('%B %d')
    ax.text(1, 0.23, date, transform=ax.transAxes, color='#777777', size=46, ha='right', weight=800)
    ax.text(0, 1.06, 'Time spend (hours) : mapping time in yellow, validation time in green',
            transform=ax.transAxes, size=12, color='#777777')
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0, 1.12, f'Hours spent on CartONG-HCR Missing Maps projects (total : {total_hour:.0f}h)',
            transform=ax.transAxes, size=24, weight=600, ha='left')
    ax.text(1, 0, 'by CartONG credit @pratapvardhan, @jburnmurdoch', transform=ax.transAxes, ha='right',
            color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    draw_mapathon(day_of_year)
    plt.box(False)
draw_barchart(323)

In [None]:
fig, ax = plt.subplots(figsize=(16, 9))
draw_barchart(323, draw_week_mapathon)

In [None]:
directory = 'all'
os.makedirs(directory, exist_ok=True)
for i in range(max(1, min(df_map4[df_map4['value'] > 0]['Day_of_year'])), 366):
    fig, ax = plt.subplots(figsize=(16, 9))
    draw_barchart(int(i))
    plt.savefig(directory + f'/{i}.png')
    plt.close()
    print(i)

In [None]:
directory = 'all_week'
os.makedirs(directory, exist_ok=True)
for i in np.arange(max(1, min(df_map4[df_map4['value'] > 0]['Day_of_year'])), 366):
    if i % 7 == 1:
        fig, ax = plt.subplots(figsize=(16, 9))
        draw_barchart(int(i), draw_week_mapathon)
        plt.savefig(directory + f'/{i}.png')
        plt.close()
        print(i)