In [None]:
import itertools
import random
import json
import matplotlib.pyplot as plt
import os
import pandas as pd
from pathlib import Path 
import sys
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
## Read all datasets for every hole size, splitted by starting year, and plot them
YEARS = [str(year) for year in range(1990,2019)]
collaborations_df = pd.read_csv('myDATA/00-collaboration_df_with_starting_years.csv')
min_size = 0
max_size = 28

In [None]:
# return a vector containing the total number of authors for each year
def get_tot_auths_num():
    file = 'myDATA/00-collaboration_df_with_starting_years.csv'
    collaborations_df = pd.read_csv(file)
    num_auths_by_y = []
    num_new_auths_by_y = []
    YEARS = [str(year) for year in range(1990,2019)]  
    tot_auth = 0
    for i in range(len(YEARS)):
        y = YEARS[i]
        
        # number of new authors in the given year
        num_new_auths_by_y.append(len(collaborations_df.loc[collaborations_df["start_year"] == int(y)]))
        
        # total number of new authors in the given year
        tot_auth += num_new_auths_by_y[i]                         
        num_auths_by_y.append(tot_auth)
    
    return num_auths_by_y
        
        
        
# plot degree trajectories contained in a given dataframe
def plot_df(df, year, kind_of_authors, hole_length):
    legend = []
    tot_auths = get_tot_auths_num()
    plt.figure(figsize=(20, 8), dpi=80)
    plt.xlabel("years stretched by # authors")
    plt.ylabel("num_Collaborations")
    plt.title("# vertex trajectories for top 100 " + kind_of_authors + " authors who started in: "+ year + " with hole of size "+str(hole_length)+" stretchd by authors\n", fontdict = {'fontsize' : 12})
    
    df = df.nlargest(100, "2018")
    for id in df["ID"]:
        
        aut = df.loc[df["ID"] == id]
        legend.append("aut " + str(id)+" -> max "+str(aut["2018"].values[0]))
        
        k = YEARS.index(year)
        x,y=[],[]
        for i in aut:
            if(i!="ID"):
                x.append(tot_auths[k])
                y.append(aut.iloc[0][i])
                k += 1


        plt.xticks(rotation='vertical')
        if(year=="2018"):
            plt.scatter(x,y)
        else:
            # plt.scatter(x,y)
            plt.plot(x, y)
    
    plt.legend(legend,loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)
    
    path = 'myDATA/10-splitted_by_year/'+str(size)+'_hole_size_splitted/trajectories_plt_by_event'
    if not os.path.exists(path):
        os.mkdir(path)
    file = 'myDATA/10-splitted_by_year/'+str(size)+'_hole_size_splitted/trajectories_plt/stretched_by_authors'+year+'_holeSize_'+str(hole_length)+'.png'
    
    plt.xticks(tot_auths, YEARS)
    
    plt.savefig(file)
    plt.show()

# Read active authors by hole lenght (Top 100 Authors)
for size in range(min_size, max_size+1):
    print((' \033[1m' + "HOLE SIZE "+str(size) + '\033[0m ').center(120, '='))
    path = 'myDATA/10-splitted_by_year/'+str(size)+'_hole_size_splitted'
    if(os.path.exists(path)):
        for y in YEARS:
            file=path+'/'+y+'_collabs_by_starting_year.csv'
            if(os.path.exists(file)):
                df_y = pd.read_csv(file)
                if(len(df_y)>0):
                    plot_df(df_y, y, "active", size)

In [None]:
# return a vector containig the total number of collaboration for each year
def get_tot_collabs_num():
    file = 'myDATA/00-collaboration_df_with_starting_years.csv'
    collaborations_df = pd.read_csv(file)
    num_colls_by_y = []
    YEARS = [str(year) for year in range(1990,2019)]  
    tot_auth = 0
    for i in range(len(YEARS)):
        y = YEARS[i]
        # total number of collaborations in the given year
        num_colls_by_y.append(collaborations_df[y].sum())
    return num_colls_by_y
        
#Simply plot degree trajectories contained in a given dataframe
def plot_df(df, year, kind_of_authors, hole_length):
    legend = []
    tot_collabs = get_tot_collabs_num()
    plt.figure(figsize=(20, 8), dpi=80)
    plt.xlabel("year stretched by # collaborations")
    plt.ylabel("num_Collaborations")
    plt.title("# vertex trajectories for top 100 " + kind_of_authors + " authors who started in: "+ year + " with hole of size "+str(hole_length)+" stretched by collaborations\n", fontdict = {'fontsize' : 12})
    
    df = df.nlargest(100, "2018")
    for id in df["ID"]:
        
        aut = df.loc[df["ID"] == id]
        legend.append("aut " + str(id)+" -> max "+str(aut["2018"].values[0]))
        
        k = YEARS.index(year)
        x,y=[],[]
        for i in aut:
            if(i!="ID"):
                x.append(tot_collabs[k])
                y.append(aut.iloc[0][i])
                k += 1


        plt.xticks(rotation='vertical')
        if(year=="2018"):
            plt.scatter(x,y)
        else:
            # plt.scatter(x,y)
            plt.plot(x, y)
    
    plt.legend(legend,loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)
    
    path = 'myDATA/10-splitted_by_year/'+str(size)+'_hole_size_splitted/trajectories_plt_by_events'
    if not os.path.exists(path):
        os.mkdir(path)
    file = 'myDATA/10-splitted_by_year/'+str(size)+'_hole_size_splitted/trajectories_plt/stretched_by_collaborations'+year+'_holeSize_'+str(hole_length)+'.png'
    
    plt.xticks(tot_collabs, YEARS)
    
    plt.savefig(file)
    plt.show()

# Read active authors by hole lenght (Top 100 Authors)
for size in range(min_size, max_size+1):
    print((' \033[1m' + "HOLE SIZE "+str(size) + '\033[0m ').center(120, '='))
    path = 'myDATA/10-splitted_by_year/'+str(size)+'_hole_size_splitted'
    if(os.path.exists(path)):
        for y in YEARS:
            file=path+'/'+y+'_collabs_by_starting_year.csv'
            if(os.path.exists(file)):
                df_y = pd.read_csv(file)
                if(len(df_y)>0):
                    plot_df(df_y, y, "active", size)