# Visualize Results

In [8]:
import math
import random

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.interpolate import splrep, splev
%matplotlib inline

## 1. Plot Contributors
Input: '/data2/zihe/data/OSS-census/contributor_by_win/*.csv' (copied to './contributor/data/')<br>
Output:
- All active contributor by gender by window: './contributor/all/'
- Core active contributor by gender by window: './contributor/core/'

In [9]:
def plot_contributors(lang):
    
    # Import data
    dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/contributor/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)
    max_win = 53
    dat = dat[dat['win']<=max_win]
    
    # Change window to date
    wins = dat["win"]
    new_wins = []
    for win in wins:
        time = 3 * win
        year = 2008 + math.floor(time/12)
        month = time - math.floor(time/12)*12
        if not month:
            month = 12
        if month == 3:
            new_wins.append("{}".format(year))
        else:
            new_wins.append("{}-{}".format(year,month))
    wins = new_wins
    
    # Plot information
    fig,ax2 = plt.subplots()
    title = lang
    if lang == "C#":
        title = "C\#"
    ax2.set_title("Active Contributors in " + r"$\bf{" + title + "}$" +" Ecosystems", fontsize=18)
    ax2.set_xlabel('Time (quarter)', fontsize=16)
    # ax.set_ylabel('Number of Contributors (thousand)', fontsize=16, labelpad=5.0)
    
    # Plot contributor number bar
    # female = (dat["female_all"] + dat["female_likely_all"]) / 1000
    # male = (dat["female_all"] + dat["female_likely_all"] + dat["male_all"] + dat["male_likely_all"]) / 1000
    # unknown = (dat["all_all"]) / 1000
    # ax.bar(wins, unknown, label = "All unknown", color = "blanchedalmond")
    # ax.bar(wins, male, label = "All men", color = "#f29d4b")
    # ax.bar(wins, female, label = "All women", color = "#de2d26")
    # plt.legend(loc=(0.02, 0.80),fontsize=12, frameon=False)

    # Plot ratio line for female in all contributor
    # ax2 = ax.twinx()
    male = (dat["male_all"] + dat["male_likely_all"]).replace(0, 1)
    ratio = (dat["female_all"] + dat["female_likely_all"]) / (dat["female_all"] + dat["female_likely_all"] + male)
    for win in dat["win"]:
        win = win - 1
        if dat["female_all"][win] + dat["female_likely_all"][win] + male[win] <= 30:
            ratio[win] = 0
    ratio2 = np.ma.masked_where(((dat["win"] >= 46)&(dat["win"] <= 48)), ratio)
    ax2.plot(wins, ratio2, color="darkblue",marker="o", markerfacecolor='white', markeredgecolor='darkblue', markeredgewidth=1, label="Among all")
    ax2.set_ylabel('Female Ratio = F/(F+M)', fontsize=16, labelpad=25.0)
    last_all = ratio[max_win-1]
    
    # Plot ratio line for female in core contributor
    male = (dat["male_core"] + dat["male_likely_core"]).replace(0, 1)
    ratio = (dat["female_core"] + dat["female_likely_core"]) / (dat["female_core"] + dat["female_likely_core"] + male)
    for win in dat["win"]:
        win = win - 1
        if dat["female_core"][win] + dat["female_likely_core"][win] + male[win] <= 30:
            ratio[win] = 0
    ratio2 = np.ma.masked_where(((dat["win"] >= 46)&(dat["win"] <= 48)), ratio)
    ax2.plot(wins, ratio2, color="darkblue",marker="s", markeredgecolor='darkblue', markeredgewidth=1, label="Among core")
    ax2.fill_between(wins, 0, 1, where = ((dat["win"] >= 46)&(dat["win"] <= 48)), color='#D3D3D3', alpha=0.5, transform=ax2.get_xaxis_transform())
    last_core = ratio[max_win-1]
    #plt.figtext(0.1, 0.02,"* Female ratio gap in the last window is " + (str(round((last_all-last_core)*100, 2))+"0")[0:4] + "%.")
    
    # Set x ticks
    # ax.set_xticks(np.arange(min(dat["win"])-1, max(dat["win"])+1, 4))
    ax2.set_xticks(np.arange(min(dat["win"])-1, max(dat["win"])+1, 4))
    
    # Set y ticks
    upper = math.ceil(max(dat["all_all"])/60000)*60
    # ax.set_yticks(np.arange(0, upper * 7 / 6, upper/6))
    # ax.set_ylim(ymin = 0, ymax = upper)
    ax2.set_yticks(np.arange(0, 0.12, 0.01))
    ax2.set_ylim(ymin = 0, ymax = 0.11)
    
    # No Frame
    # ax.spines['top'].set_visible(False)
    ax2.spines['top'].set_visible(False)
    # ax.spines['bottom'].set_visible(False)
    ax2.spines['bottom'].set_visible(True)
    # ax.spines['left'].set_visible(False)
    ax2.spines['left'].set_visible(True)
    # ax.spines['right'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    # ax.grid(axis='y')
    # ax2.grid(axis='y')
    
    # Set Size
    plt.legend(loc=(0.05, 0.85),fontsize=12, frameon=True,edgecolor="white")
    plt.rcParams["figure.figsize"] = (10,7)
    
    # Save figure
    plt.savefig("/Users/katy/Desktop/oss-census-visualization/plots/contributor_line/"+lang+"_contributor.pdf", facecolor='white', transparent=False)
    plt.savefig("/Users/katy/Desktop/oss-census-visualization/plots/contributor_line/"+lang+"_contributor.png", facecolor='white', transparent=False)
    #plt.show()
    plt.clf()

#plot_contributors("HTML")

In [6]:
langs = ["Atom", "Bower", "Cargo", "Clojars", "CocoaPods", "CPAN", "CRAN", 
          "Go", "Hackage", "Hex", "Maven", "Meteor", "NPM", "NuGet", "Packagist", 
          "PlatformIO", "Pub", "Puppet", "Pypi", "Rubygems", "All"]
for lang in langs:
    plot_contributors(lang)



  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/contributor/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/contributor/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/contributor/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/contributor/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/contributor/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/contributor/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visu

<Figure size 432x288 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

## 2. Plot Commits
Input: '/data2/zihe/data/OSS-census/commit_by_win/*.csv' (copied to './commit/data/')<br>
Output: Commit count by gender by window: './commit/graph'

In [16]:
def plot_commits(lang):
    
    # Import data
    dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/commit/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)
    max_win = 53
    dat = dat[dat['win']<=max_win]
    
    # Change window to date
    wins = dat["win"]
    new_wins = []
    for win in wins:
        time = 3 * win
        year = 2008 + math.floor(time/12)
        month = time - math.floor(time/12)*12
        if not month:
            month = 12
        if month == 3:
            new_wins.append("{}".format(year))
        else:
            new_wins.append("{}-{}".format(year,month))
    wins = new_wins
    
    # Plot information
    fig,ax2 = plt.subplots()
    title = lang
    if lang == "C#":
        title = "C\#"
    ax2.set_title("Commits in " + r"$\bf{" + title + "}$" +" Public Projects", fontsize=18)
    ax2.set_xlabel('Time (quarter)', fontsize=16)
    # ax.set_ylabel('Commit Numbers (thousand)', fontsize=16, labelpad=5.0)
    
    # Plot commit number bar
    # female = dat["female_commit"] / 1000
    # male = (dat["female_commit"] + dat["male_commit"]) / 1000
    # unknown = dat["all_commit"] / 1000
    # ax.bar(wins, unknown, label = "unknown", color = "blanchedalmond")
    # ax.bar(wins, male, label = "male", color = "#f29d4b")
    # ax.bar(wins, female, label = "female", color = "#de2d26")
    # plt.legend(loc=(0.02, 0.80),fontsize=12, frameon=False)
    
    # Plot ratio line for female in all commits
    # ax2=ax.twinx()
    male = dat["male_commit"].replace(0, 1)
    ratio = dat["female_commit"] / (dat["female_commit"] + male)
    for win in dat["win"]:
        win = win - 1
        if dat["female_commit"][win] + male[win] <= 1000:
            ratio[win] = 0
    
    ratio2 = np.ma.masked_where(((dat["win"] >= 46)&(dat["win"] <= 48)), ratio)
    ax2.plot(wins, ratio2,color="darkblue",marker="o", markerfacecolor='white', markeredgecolor='darkblue', markeredgewidth=1, label="female ratio")
    ax2.fill_between(wins, 0, 1, where = ((dat["win"] >= 46)&(dat["win"] <= 48)), color='#D3D3D3', alpha=0.5, transform=ax2.get_xaxis_transform())
    ax2.set_ylabel('Female Commit Ratio = F/(F+M)', fontsize=16, labelpad=25.0).set_rotation(270)
    
    # Set x ticks
    # ax.set_xticks(np.arange(min(dat["win"])-1, max(dat["win"])+1, 4))
    ax2.set_xticks(np.arange(min(dat["win"])-1, max(dat["win"])+1, 4))
    
    # Set y ticks
    upper = math.ceil(max(dat["all_commit"])/600000)*600
    # ax.set_yticks(np.arange(0, upper *7/6, upper/6))
    # ax.set_ylim(ymin = 0, ymax = upper)
    ax2.set_yticks(np.arange(0, 0.36, 0.05))
    ax2.set_ylim(ymin = 0, ymax = 0.35)

    # No Frame
    # ax.spines['top'].set_visible(False)
    ax2.spines['top'].set_visible(False)
    # ax.spines['bottom'].set_visible(False)
    ax2.spines['bottom'].set_visible(True)
    # ax.spines['left'].set_visible(False)
    ax2.spines['left'].set_visible(True)
    # ax.spines['right'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    #ax.grid(axis='y')
    #ax2.grid(axis='y')
    
    # Set Size
    plt.legend(loc=(0.05, 0.85),fontsize=12, frameon=True, edgecolor="white")
    plt.rcParams["figure.figsize"] = (10,7)
    
    # Save figure
    plt.savefig("/Users/katy/Desktop/oss-census-visualization/plots/commit_line/"+lang+"_commit.pdf", facecolor='white', transparent=False)
    plt.savefig("/Users/katy/Desktop/oss-census-visualization/plots/commit_line/"+lang+"_commit.png", facecolor='white', transparent=False)
    #plt.show()
    plt.clf()
    
#plot_commits("JavaScript")

In [17]:
langs = ["Atom", "Bower", "Cargo", "Clojars", "CocoaPods", "CPAN", "CRAN", 
          "Go", "Hackage", "Hex", "Maven", "Meteor", "NPM", "NuGet", "Packagist", 
          "PlatformIO", "Pub", "Puppet", "Pypi", "Rubygems", "All"]
for lang in langs:
    plot_commits(lang)



  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/commit/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/commit/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/commit/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/commit/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/commit/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/commit/'+lang+'.csv', error_bad_lines=False, warn_bad_lines=False, index_col=False)


  dat = pd.read_csv('/Users/katy/Desktop/oss-census-visualization/commit/'+lang+'.csv'

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>