In [None]:
import datetime
import numpy as np
import os
import pandas as pd
import plotly.express as px

In [None]:
data_dir = '../data'

## Load data

In [None]:
df = pd.read_csv(os.path.join(data_dir, 'merged_stats.csv'), encoding='ANSI') # On Windows
# df = pd.read_csv(os.path.join(data_dir, 'merged_stats.csv')) # On Linux
df.head()

## Filter on CartONG projects


<font color='red'>WARNING: File to update</font>

The next section can be runned without it but we count data from projects where CartONG intervene only in a mapathon.

In [None]:
raw_projects = pd.read_csv(os.path.join(data_dir, 'Orga_Missing_Maps_CartONG_2022_06_05.csv'), header=1)['N° Projet']
projects = []
for s in raw_projects:
    try:
        projects.append(int(s))
    except:
        pass
df = df[df['Project'].isin(projects)]

In [None]:
# Remove old years
df = df[df["Year"] > 2017]

## New contributors by day

In [None]:
date_cols = ["Year", "Month", "Day"]
df['Date'] = pd.to_datetime(df[date_cols].apply(lambda x: '-'.join(x.values.astype(str)), axis="columns"))
df.head()

In [None]:
contributors_first_date = pd.DataFrame()
min_year = df["Year"].min()
for year in sorted(df["Year"].unique()):
    df_year = df.loc[df["Year"] == year].copy()
    
    # Set a Date index similar to each year
    df_year["Fake_Date"] = df_year["Date"].apply(lambda d: d - pd.DateOffset(years=(year-min_year)))
    
    # Compute the number of contributors
    author_first_date = df_year[["Author", "Fake_Date"]].groupby("Author").min()
    contributors_first_date_year = author_first_date.reset_index().groupby('Fake_Date').count().cumsum()
    contributors_first_date_year["Year"] = year
    contributors_first_date = pd.concat([contributors_first_date, contributors_first_date_year], axis=0)
contributors_first_date.reset_index(inplace=True)

In [None]:
fig = px.line(contributors_first_date, x="Fake_Date", y="Author", color="Year")
fig.update_xaxes(title_text = 'Date of first contribution')
fig.update_yaxes(title_text = "Contributors number")
fig.update_layout(title={'text':f'Cumulative sum of total contributors in a year'})
fig.update_layout(xaxis_tickformat = '%d %B')
with open(f'{data_dir}/contributors.json', 'w') as f:
    f.write(fig.to_json())
fig

## Time contribution

In [None]:
mapping_time = pd.DataFrame()
valid_time = pd.DataFrame()
total_time = pd.DataFrame()
min_year = df["Year"].min()
for year in sorted(df["Year"].unique()):
    df_year = df[df["Year"] == year].copy()
    
    # Set a Date index similar to each year
    df_year["Fake_Date"] = df_year["Date"].apply(lambda d: d - pd.DateOffset(years=(year-min_year)))
    
    # Compute the contribution time
    mapping_time_year = df_year[df_year["Type"] == "MAPPING"].groupby('Fake_Date').sum().cumsum()
    valid_time_year = df_year[df_year["Type"] == "VALIDATION"].groupby('Fake_Date').sum().cumsum()
    total_time_year = df_year.groupby('Fake_Date').sum().cumsum()
    mapping_time_year["Year"] = year
    valid_time_year["Year"] = year
    total_time_year["Year"] = year
    mapping_time = pd.concat([mapping_time, mapping_time_year], axis=0)
    valid_time = pd.concat([valid_time, valid_time_year], axis=0)
    total_time = pd.concat([total_time, total_time_year], axis=0)
mapping_time["Duration"] /= 3600 * 7 * 229
valid_time["Duration"] /= 3600 * 7 * 229
total_time["Duration"] /= 3600 * 7 * 229
mapping_time.reset_index(inplace=True)
valid_time.reset_index(inplace=True)
total_time.reset_index(inplace=True)

In [None]:
fig = px.line(mapping_time, x="Fake_Date", y="Duration", color="Year")
fig.update_xaxes(title_text = 'Date')
fig.update_yaxes(title_text = "Mapping time in ETP (229 days of 7 hours)")
fig.update_layout(title={'text':f'Cumulative sum of mapping time in ETP (229 days of 7 hours)'})
fig.update_layout(xaxis_tickformat = '%d %B')
with open(f'{data_dir}/mapping_time.json', 'w') as f:
    f.write(fig.to_json())
fig

In [None]:
fig = px.line(valid_time, x="Fake_Date", y="Duration", color="Year")
fig.update_xaxes(title_text = 'Date')
fig.update_yaxes(title_text = "Validation time in ETP (229 days of 7 hours)")
fig.update_layout(title={'text':f'Cumulative sum of validation time in ETP (229 days of 7 hours)'})
fig.update_layout(xaxis_tickformat = '%d %B')
with open(f'{data_dir}/validation_time.json', 'w') as f:
    f.write(fig.to_json())
fig

In [None]:
fig = px.line(total_time, x="Fake_Date", y="Duration", color="Year")
fig.update_xaxes(title_text = 'Date')
fig.update_yaxes(title_text = "Contribution time in ETP (229 days of 7 hours)")
fig.update_layout(title={'text':f'Cumulative sum of contribution time in ETP (229 days of 7 hours)'})
fig.update_layout(xaxis_tickformat = '%d %B')
with open(f'{data_dir}/total_time.json', 'w') as f:
    f.write(fig.to_json())
fig