# **Importing Libraries**

**Here we import all relevant libraries for this Notebook, and set up the appropiate working directory**

In [3]:
api = input("Insert API key from Plotly: ")
user = input("Insert user name from Plotly: ")
# 2NexotbGiaTPZ5x7CNWa; mlcanales
%config InlineBackend.figure_format ='retina'
import pandas as pd
import os
import plotly
import chart_studio
import plotly.io as pio
import chart_studio.plotly as py 
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import matplotlib.cm
from plotly.offline import plot
from glob import glob
import shutil
chart_studio.tools.set_credentials_file(username=user, api_key=api)


# To get all results printed: (or "last_expr" to only last expression from cell)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Insert API key from Plotly:  2NexotbGiaTPZ5x7CNWa
Insert user name from Plotly:  mlcanales


**Now we set up some options to better handle the Database**

In [3]:
pd.set_option('display.max_columns', None)  # To show all columns on results
pd.set_option('display.max_rows', None)     # To show all rows on results
pd.set_option('display.max_colwidth', -1) # To display all length of each cell

# **Setting Working Directory**

**Since the user might want to run different keyword searches, that will require different files to work on in the upcoming cleaning procedure, we create a unique set of folders for each search**

In [4]:
# We start from the Jupyter notebooks folder
os.getcwd()
# Move back to main path: Bibliometric analysis
%cd ..
main_path = os.getcwd() + "/"

'/Users/macadmin/Google Drive/Shared Folders/GW-ABM — Review/Bibliometric analysis/Jupyter notebooks'

/Users/macadmin/Google Drive/Shared Folders/GW-ABM — Review/Bibliometric analysis


# **Output (Library)**

## **Compound Annual Growth Rate &#9745;**

**Here calculated for initial searches**

In [5]:
# First we read files
GW_ABM = pd.read_csv("Keyword Searches/Main DB (ABM & GW)/Database/Main DB (ABM & GW).csv")
SH = pd.read_csv("Keyword Searches/SH/Database/SH.csv")
TC = pd.read_csv("Keyword Searches/TC/Database/TC.csv")
PM = pd.read_csv("Keyword Searches/PM/Database/PM.csv")
EM_2 = pd.read_csv("Keyword Searches/EM2/Database/EM2.csv")
Merged = pd.read_csv('Keyword Searches/Merged DB (GW-ABM,SH,PM,TC,EM)/Database/Merged DB (GW-ABM,SH,PM,TC,EM).csv')

# Annual Scientific Production
ASP_GW_ABM = pd.DataFrame({'GW_ABM Count' : GW_ABM.groupby( ["PY"] ).size()})
ASP_SH = pd.DataFrame({'SH Count' : SH.groupby( ["PY"] ).size()})
ASP_TC = pd.DataFrame({'TC Count' : TC.groupby( ["PY"] ).size()})
ASP_PM = pd.DataFrame({'PM Count' : PM.groupby( ["PY"] ).size()})
ASP_EM_2 = pd.DataFrame({'EM2 Count' : EM_2.groupby( ["PY"] ).size()})
ASP_Merged = pd.DataFrame({'Merged Count' : Merged.groupby( ["PY"] ).size()})

# Here we merged these dataframes
frames = [ASP_Merged, ASP_GW_ABM, ASP_SH, ASP_TC, ASP_PM, ASP_EM_2]
concatenate = pd.concat(frames, sort = False, axis = 1)
concatenate = concatenate.fillna(0)
concatenate = concatenate.cumsum()

# We drop years 2020 and 2019
#concatenate.drop(concatenate.tail(1).index, inplace=True)
concatenate.drop(concatenate.tail(1).index, inplace=True)

# Computation of ACGR
var = ["GW_ABM Count", "SH Count", "TC Count", "PM Count", "EM2 Count", "Merged Count"]
for element in var:
    final_val = concatenate[element].iloc[-1]
    final_t = concatenate[element].index[-1]
    #print(final_val, final_t)
    i = 0
    initial_val = concatenate[element].iloc[i]
    #print(initial_val)
    while initial_val == 0:
        i+=1
        initial_val = concatenate[element].iloc[i]
        #print(initial_val)
    initial_t = concatenate[element].index[i]
    T = final_t-initial_t
    print("Annual Compound Growth Rate for",element,"is: ", 100*((final_val/initial_val)**(1/(T+1))-1)) 

Annual Compound Growth Rate for GW_ABM Count is:  23.859250269349563
Annual Compound Growth Rate for SH Count is:  75.17416165749522
Annual Compound Growth Rate for TC Count is:  81.14473285278132
Annual Compound Growth Rate for PM Count is:  33.191024110484356
Annual Compound Growth Rate for EM2 Count is:  15.959361920386982
Annual Compound Growth Rate for Merged Count is:  17.73078275229041


**Here calculated for GW related searches**

In [35]:
# First we read the databases
SH = pd.read_csv("Keyword Searches/SH & GW/Database/SH_GW_DB.csv")
TC = pd.read_csv("Keyword Searches/TC & GW/Database/TC_GW_DB.csv")
PM = pd.read_csv("Keyword Searches/PM & GW/Database/PM_GW_DB.csv")
EM = pd.read_csv("Keyword Searches/EM & GW/Database/EM_GW_DB.csv")
GW_ABM = pd.read_csv("Keyword Searches/Main DB (ABM & GW)/Database/Main DB (ABM & GW).csv")

#Annual Scientific Production
ASP_SH = pd.DataFrame({'ASP_SH' : SH.groupby( ["PY"] ).size()})
ASP_TC = pd.DataFrame({'ASP_TC' : TC.groupby( ["PY"] ).size()})
ASP_PM = pd.DataFrame({'ASP_PM' : PM.groupby( ["PY"] ).size()})
ASP_EM = pd.DataFrame({'ASP_EM' : EM.groupby( ["PY"] ).size()})
ASP_GW_ABM = pd.DataFrame({'ASP_GW_ABM' : GW_ABM.groupby( ["PY"] ).size()})

# Here we merged these dataframes, erase NaN values and obtain the Cummulative Distribution Function (CDF)
frames = [ASP_GW_ABM, ASP_SH, ASP_TC, ASP_PM, ASP_EM]
concatenate = pd.concat(frames, sort = False, axis = 1)
concatenate = concatenate.fillna(0)
concatenate = concatenate.cumsum()

# We drop years 2020 and 2019
concatenate.drop(concatenate.tail(1).index, inplace=True)
concatenate.drop(concatenate.tail(1).index, inplace=True)

# Computation of ACGR
var = ["ASP_GW_ABM", "ASP_SH", "ASP_TC", "ASP_PM", "ASP_EM"]
for element in var:
    final_val = concatenate[element].iloc[-1]
    final_t = concatenate[element].index[-1]
    #print(final_val, final_t)
    i = 0
    initial_val = concatenate[element].iloc[i]
    #print(initial_val)
    while initial_val == 0:
        i+=1
        initial_val = concatenate[element].iloc[i]
        #print(initial_val)
    initial_t = concatenate[element].index[i]
    T = final_t-initial_t
    print("Annual Compound Growth Rate for",element,"is: ", 100*((final_val/initial_val)**(1/(T+1))-1)) 

Annual Compound Growth Rate for ASP_GW_ABM is:  23.685867209516044
Annual Compound Growth Rate for ASP_SH is:  71.14043696207197
Annual Compound Growth Rate for ASP_TC is:  25.99210498948732
Annual Compound Growth Rate for ASP_PM is:  24.962776727305425
Annual Compound Growth Rate for ASP_EM is:  14.817790280738619


## **Annual Scientific Production Original (Plotly) &#9745;**

**Here we first select Color Scale**

In [507]:
import matplotlib.pyplot as plt
import matplotlib.cm

# Available color scales: https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
#print(plt.colormaps())

# Good ones: hsv; tab20b; tab20c; twilight; twilight_shifted
cmap = plt.cm.get_cmap('magma')
in_list = [0.1,0.3,0.5,0.7,0.8,0.9]
colour_list=[]
for element in in_list:
    rgba = cmap(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list.append(rgb)
colour_list

cmap2 = plt.cm.get_cmap('viridis')
colour_list2=[]
for element in in_list:
    rgba = cmap2(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list2.append(rgb)
colour_list2

# Color blind friendly pallet
color_blind_friendly_list = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#D55E00", "#0072B2",  "#CC79A7"]

['rgb(20, 13, 53)',
 'rgb(99, 25, 127)',
 'rgb(182, 54, 121)',
 'rgb(246, 112, 91)',
 'rgb(253, 159, 108)',
 'rgb(253, 207, 146)']

['rgb(72, 35, 116)',
 'rgb(52, 94, 141)',
 'rgb(32, 144, 140)',
 'rgb(68, 190, 112)',
 'rgb(121, 209, 81)',
 'rgb(189, 222, 38)']

**Here we read files and erase years 2020 and 2019**

In [1]:
GW_ABM = pd.read_csv("Keyword Searches/Main DB (ABM & GW)/Database/Main DB (ABM & GW).csv")
SH = pd.read_csv("Keyword Searches/SH/Database/SH.csv")
TC = pd.read_csv("Keyword Searches/TC/Database/TC.csv")
PM = pd.read_csv("Keyword Searches/PM/Database/PM.csv")
EM_2 = pd.read_csv("Keyword Searches/EM2/Database/EM2.csv")
Merged = pd.read_csv('Keyword Searches/Merged DB (GW-ABM,SH,PM,TC,EM)/Database/Merged DB (GW-ABM,SH,PM,TC,EM).csv')

#Annual Scientific Production
ASP_GW_ABM = pd.DataFrame({'GW_ABM Count' : GW_ABM.groupby( ["PY"] ).size()})
ASP_SH = pd.DataFrame({'SH Count' : SH.groupby( ["PY"] ).size()})
ASP_TC = pd.DataFrame({'TC Count' : TC.groupby( ["PY"] ).size()})
ASP_PM = pd.DataFrame({'PM Count' : PM.groupby( ["PY"] ).size()})
ASP_EM_2 = pd.DataFrame({'EM2 Count' : EM_2.groupby( ["PY"] ).size()})
ASP_Merged = pd.DataFrame({'Merged Count' : Merged.groupby( ["PY"] ).size()})

frames = [ASP_GW_ABM, ASP_Merged, ASP_SH, ASP_TC, ASP_PM, ASP_EM_2]
concat = pd.concat(frames, sort = False, axis = 1)
concat = concat.fillna(0)
# We drop years 2020 and 2019 to avoid bias in the graph
concat.drop(concat.tail(1).index, inplace=True)
concat.drop(concat.tail(1).index, inplace=True)
concat
#concat['GW_ABM Count']
#concat.index
#concat.plot.line()

FileNotFoundError: [Errno 2] File b'Keyword Searches/Main DB (ABM & GW)/Database/Main DB (ABM & GW).csv' does not exist: b'Keyword Searches/Main DB (ABM & GW)/Database/Main DB (ABM & GW).csv'

### **Final Figure**

In [512]:
# Color Selection
cmap = plt.cm.get_cmap('tab20c')
in_list = [0.1,0.3,0.5,0.7,0.8,0.9]
colour_list=[]
for element in in_list:
    rgba = cmap(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list.append(rgb)
colour_list

cmap2 = plt.cm.get_cmap('viridis')
colour_list2=[]
for element in in_list:
    rgba = cmap2(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list2.append(rgb)
colour_list2

['rgb(158, 202, 225)',
 'rgb(253, 174, 107)',
 'rgb(161, 217, 155)',
 'rgb(188, 189, 220)',
 'rgb(99, 99, 99)',
 'rgb(189, 189, 189)']

['rgb(72, 35, 116)',
 'rgb(52, 94, 141)',
 'rgb(32, 144, 140)',
 'rgb(68, 190, 112)',
 'rgb(121, 209, 81)',
 'rgb(189, 222, 38)']

In [513]:
%config InlineBackend.figure_format ='retina'

trace1 = go.Scatter(
                x = concat.index.values,
                y = concat["GW_ABM Count"],
                mode = 'lines',
                name = "(2) GW-ABM    ",
                #marker = dict(color = colour_list[0]),
                text = concat['GW_ABM Count'],
                line = dict(color=colour_list[3], width=2.5))
trace11 = go.Scatter(
                x = concat.index.values,
                y = concat["Merged Count"],
                mode = 'lines',
                name = "(1) Merged DB    ",
                #marker = dict(color = colour_list[0]),
                text = concat['Merged Count'],
                line = dict(color=colour_list2[0], width=1.5))
trace2 = go.Scatter(
                x = concat.index.values,
                y = concat["SH Count"],
                mode = 'lines',
                name = "(B) SH    ",
                #marker = dict(color = colour_list2[2]),
                text = concat['SH Count'],
                line = dict(color="rgb(44,160,44)", width=2.5, dash='dot'))
trace3 = go.Scatter(
                x = concat.index.values,
                y = concat["TC Count"],
                mode = "lines",
                name = "(D) TC",
                #marker = dict(color = colour_list2[3]),
                text = concat['TC Count'],
                line = dict(color="rgb(201,219,37)", width=2.5, dash='dot'))
trace4 = go.Scatter(
                x = concat.index.values,
                y = concat['PM Count'],
                mode = "lines",
                name = "(C) PM    ",
                #marker = dict(color = colour_list2[4]),
                text = concat['PM Count'],
                line = dict(color="rgb(214,39,40)", width=2.5, dash='dot'))
trace5 = go.Scatter(
                x = concat.index.values,
                y = concat['EM2 Count'],
                mode = "lines",
                name = '(A) EM    ',
                #marker = dict(color = colour_list2[5]),
                text = concat['EM2 Count'],
                line = dict(color="rgb(31,119,180)", width=2.5, dash='dot'))

data = [trace11, trace1, trace5, trace2, trace4, trace3]

layout = go.Layout(width= 1000, height= 600,
              xaxis= dict(title= '<b>Publication Year<b>',ticklen= 5,tick0=1975,tickvals=(1975,1980,1985,1990,1995,2000,2005,2010,2015,2018),zeroline=False,ticks = "inside", title_font=dict(size=18), showline=False, linewidth=1, linecolor='black'),
              yaxis= dict(title= '<b>Number of Documents<b>',tick0 = 0,dtick = 40,ticklen= 5,zeroline=False, ticks = "inside", title_font=dict(size=18),showline=False, linewidth=1, linecolor='black'),
              showlegend = True, legend=dict(orientation='h',yanchor='bottom',xanchor='center',y=1.002,x=0.5,font=dict(size=12)),
              annotations=[dict(x=0.5,y=1.12,align="right",valign="top",text='<b>Legend<b>',font=dict(size=15),showarrow=False,xref="paper",yref="paper",xanchor="center",yanchor="top")],
              template="seaborn")

fig = go.Figure(data = data, layout = layout)

# Merged = 1
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.95,xanchor='left', yanchor='middle', text='<b>1</b> (18%)',font=dict(size=13,color=colour_list2[0]),showarrow=False))
# EM = 3
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.45,xanchor='left', yanchor='middle', text='<b>A</b> (16%)',font=dict(size=13,color="rgb(31,119,180)"),showarrow=False))
# TC = 6
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.17,xanchor='left', yanchor='middle', text='<b>D</b> (92%)',font=dict(size=13,color="rgb(201,219,37)"),showarrow=False))
# GWABM = 2
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.073,xanchor='left', yanchor='middle', text='<b>2</b> (24%)',font=dict(size=13,color=colour_list[3]),showarrow=False))
# SH = 4
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.29,xanchor='left', yanchor='middle', text='<b>B</b> (83%)',font=dict(size=13,color="rgb(44,160,44)"),showarrow=False))
# PM = 5
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.21,xanchor='left', yanchor='middle', text='<b>C</b> (34%)',font=dict(size=13,color="rgb(214,39,40)"),showarrow=False))

#legend: ,bgcolor="white", bordercolor="Black", borderwidth=0.3

url = plot(fig, validate=False)
url
#"plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"

'temp-plot.html'

## **Annual Scientific Production related to GW (Plotly) &#9745;**

In [364]:
# First we read the databases
SH = pd.read_csv("Keyword Searches/SH & GW/Database/SH_GW_DB.csv")
TC = pd.read_csv("Keyword Searches/TC & GW/Database/TC_GW_DB.csv")
PM = pd.read_csv("Keyword Searches/PM & GW/Database/PM_GW_DB.csv")
EM = pd.read_csv("Keyword Searches/EM & GW/Database/EM_GW_DB.csv")
GW_ABM = pd.read_csv("Keyword Searches/Main DB (ABM & GW)/Database/Main DB (ABM & GW).csv")

#Annual Scientific Production
ASP_SH = pd.DataFrame({'SH Count' : SH.groupby( ["PY"] ).size()})
ASP_TC = pd.DataFrame({'TC Count' : TC.groupby( ["PY"] ).size()})
ASP_PM = pd.DataFrame({'PM Count' : PM.groupby( ["PY"] ).size()})
ASP_EM = pd.DataFrame({'EM Count' : EM.groupby( ["PY"] ).size()})
ASP_GW_ABM = pd.DataFrame({'GW_ABM Count' : GW_ABM.groupby( ["PY"] ).size()})

frames = [ASP_SH, ASP_TC, ASP_PM, ASP_EM, ASP_GW_ABM]
concat = pd.concat(frames, sort = False, axis = 1)

concat = concat.fillna(0)
# We drop years 2020 and 2019 to avoid bias in the graph
concat.drop(concat.tail(1).index, inplace=True)
concat.drop(concat.tail(1).index, inplace=True)
#concat.plot.line()
concat
#concat.plot.line()

Unnamed: 0_level_0,SH Count,TC Count,PM Count,EM Count,GW_ABM Count
PY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1996.0,0.0,0.0,0.0,1.0,0.0
1997.0,0.0,0.0,0.0,1.0,0.0
2003.0,0.0,0.0,0.0,0.0,1.0
2005.0,0.0,0.0,0.0,0.0,2.0
2007.0,0.0,0.0,2.0,0.0,2.0
2008.0,0.0,0.0,2.0,0.0,1.0
2009.0,0.0,0.0,1.0,0.0,2.0
2010.0,0.0,0.0,4.0,1.0,5.0
2011.0,0.0,0.0,3.0,3.0,0.0
2012.0,1.0,0.0,1.0,1.0,1.0


In [365]:
# Color Selection
cmap = plt.cm.get_cmap('Accent')
in_list = [0.1,0.3,0.5,0.7,0.8,0.9]
colour_list=[]
for element in in_list:
    rgba = cmap(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list.append(rgb)
colour_list

['rgb(127, 201, 127)',
 'rgb(253, 192, 134)',
 'rgb(56, 108, 176)',
 'rgb(240, 2, 127)',
 'rgb(191, 91, 22)',
 'rgb(102, 102, 102)']

In [366]:
%config InlineBackend.figure_format ='retina'
years = list(range(1996, 2019))

trace1 = go.Bar(
                x = concat.index.values,
                y = concat["SH Count"],
                name = "(B) GW-SH",
                marker_color = colour_list[0],
                #width=0.3            
                text = concat['SH Count'])
                #textposition = "auto"

trace2 = go.Bar(
                x = concat.index.values,
                y = concat["TC Count"],
                name = "(D) GW-TC",
                marker_color = colour_list[1],
                text = concat['TC Count'])

trace3 = go.Bar(
                x = concat.index.values,
                y = concat['PM Count'],
                name = "(C) GW-PM   ",
                marker_color = colour_list[3],
                text = concat['PM Count'])


trace4 = go.Bar(
                x = concat.index.values,
                y = concat['EM Count'],
                name = '(A) GW-EM   ',
                marker_color = colour_list[2],
                text = concat['EM Count'])

trace5 = go.Scatter(
                x = concat.index.values,
                y = concat["GW_ABM Count"],
                mode = 'lines+markers',
                name = "(1) GW-ABM   ",
                text = concat['GW_ABM Count'],
                line = dict(color='rgb(72, 35, 116)', width=1.5),
                marker=dict(color='Purple',size=7,line=dict(color='MediumPurple',width=0.5)))

data = [trace5, trace4, trace1, trace3, trace2]

layout = go.Layout(width= 1000, height= 600,
              xaxis= dict(title= '<b>Publication Year<b>',ticklen= 5,tick0=1996,tickvals=years,zeroline=False,ticks = "inside", title_font=dict(size=18), showline=False, linewidth=1, linecolor='black', tickangle=-45, showgrid=False),
              yaxis= dict(title= '<b>Number of Documents<b>',tick0 = 0,dtick = 2,ticklen= 5,zeroline=False, ticks = "inside", title_font=dict(size=18),showline=False, linewidth=1, linecolor='black'),
              showlegend = True, legend=dict(traceorder='normal',orientation='h',yanchor='bottom',xanchor='center',y=1,x=0.5),
              annotations=[dict(x=0.5,y=1.12,align="right",valign="top",text='<b>Legend<b>',font=dict(size=15),showarrow=False,
              xref="paper",yref="paper",xanchor="center",yanchor="top")],template="seaborn",
              barmode='stack',bargap=0.1)
             
            
fig = go.Figure(data = data, layout = layout)

# EM = 3
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.13,xanchor='left', yanchor='middle', text='<b>A</b> (15%)',font=dict(size=13,color=colour_list[2]),showarrow=False))
#fig.add_shape(
#        # Line Diagonal
#        go.layout.Shape(type="line",xref='paper',yref="paper",x0=0.945,x1=1.007,y1=0.13,y0=0.13,line=dict(color="rgb(31,119,180)",width=2)))
#

# TC = 6
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.685,xanchor='left', yanchor='middle', text='<b>D</b> (26%)',font=dict(size=13,color=colour_list[1]),showarrow=False))

# GWABM = 2
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.315,xanchor='left', yanchor='middle', text='<b>1</b> (24%)',font=dict(size=13,color='rgb(72, 35, 116)'),showarrow=False))

# SH = 4
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.5,xanchor='left', yanchor='middle', text='<b>B</b> (71%)',font=dict(size=13,color=colour_list[0]),showarrow=False))

# PM = 5
fig.add_annotation(
    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.87,xanchor='left', yanchor='middle', text='<b>C</b> (25%)',font=dict(size=13,color=colour_list[3]),showarrow=False))



url = plot(fig, validate=False)
url
#"plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"

'temp-plot.html'

## **ASP - GW_ABM only**

In [517]:
# First we read the databases
GW_ABM = pd.read_csv("Keyword Searches/Main DB (ABM & GW)/Database/Main DB (ABM & GW).csv")

#Annual Scientific Production
ASP_GW_ABM = pd.DataFrame({'GW_ABM Count' : GW_ABM.groupby( ["PY"] ).size()})

frames = [ASP_GW_ABM]
concat = pd.concat(frames, sort = False, axis = 1)

concat = concat.fillna(0)
# We drop years 2020 and 2019 to avoid bias in the graph
concat.drop(concat.tail(1).index, inplace=True)
#concat.drop(concat.tail(1).index, inplace=True)
#concat.plot.line()
concat
#concat.plot.line()

Unnamed: 0_level_0,GW_ABM Count
PY,Unnamed: 1_level_1
2003,1
2005,2
2007,2
2008,1
2009,2
2010,5
2012,1
2013,1
2014,1
2015,3


In [518]:
# Color Selection
cmap = plt.cm.get_cmap('Accent')
in_list = [0.1,0.3,0.5,0.7,0.8,0.9]
colour_list=[]
for element in in_list:
    rgba = cmap(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list.append(rgb)
colour_list

['rgb(127, 201, 127)',
 'rgb(253, 192, 134)',
 'rgb(56, 108, 176)',
 'rgb(240, 2, 127)',
 'rgb(191, 91, 22)',
 'rgb(102, 102, 102)']

In [519]:
%config InlineBackend.figure_format ='retina'
years = list(range(2003, 2020))

trace1 = go.Bar(
                x = concat.index.values,
                y = concat["GW_ABM Count"],
                name = "GW-ABM Database (CAGR 24%)",
                marker_color = colour_list[2],
    marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.7,
                #width=0.3            
                text = concat['GW_ABM Count'])
                #textposition = "auto"
#'rgb(72, 35, 116)'


data = [trace1]

layout = go.Layout(width= 1000, height= 600,
              xaxis= dict(title= '<b>Publication Year<b>',ticklen= 5,tickvals=years,zeroline=False,ticks = "inside", title_font=dict(size=18), showline=False, linewidth=1, linecolor='black', tickangle=-45, showgrid=False),
              yaxis= dict(title= '<b>Number of Documents<b>',tick0 = 0,dtick = 1,ticklen= 5,zeroline=False, ticks = "inside", title_font=dict(size=18),showline=False, linewidth=1, linecolor='black'),
              showlegend = True, legend=dict(traceorder='normal',orientation='h',yanchor='bottom',xanchor='center',y=1,x=0.5),
              annotations=[dict(x=0.5,y=1.12,align="right",valign="top",text='<b>Legend<b>',font=dict(size=15),showarrow=False,
              xref="paper",yref="paper",xanchor="center",yanchor="top")],template="seaborn",
              barmode='stack',bargap=0.15)
             
            
fig = go.Figure(data = data, layout = layout)

# GWABM = 2
#fig.add_annotation(
#    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.315,xanchor='left', yanchor='middle', text='<b>1</b> (24%)',font=dict(size=13,color='rgb(72, 35, 116)'),showarrow=False))

url = plot(fig, validate=False)
url
#"plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"

'temp-plot.html'

### With Agriculture

In [520]:
# First we read the databases
GW_ABM = pd.read_csv("Keyword Searches/Main DB (ABM & GW)/Database/Main DB (ABM & GW).csv")

#Annual Scientific Production
ASP_GW_ABM = pd.DataFrame({'GW_ABM Count' : GW_ABM.groupby( ["PY"] ).size()})

frames = [ASP_GW_ABM]
concat = pd.concat(frames, sort = False, axis = 1)

concat = concat.fillna(0)
# We drop years 2020 and 2019 to avoid bias in the graph
#concat.drop(concat.tail(1).index, inplace=True)
concat.drop(concat.tail(1).index, inplace=True)

topic_AG = [1,0,1,0,0,0,1,0,0,0,0,1,3,2,2,1,6]
df_AG = pd.DataFrame(topic_AG, index=years,columns=['topic AG'])
df_AG
concat

concat['new']=concat['GW_ABM Count']-df_AG['topic AG']
concat
#concat.sub(df_AG, axis='columns')


Unnamed: 0,topic AG
2003,1
2004,0
2005,1
2006,0
2007,0
2008,0
2009,1
2010,0
2011,0
2012,0


Unnamed: 0_level_0,GW_ABM Count
PY,Unnamed: 1_level_1
2003,1
2005,2
2007,2
2008,1
2009,2
2010,5
2012,1
2013,1
2014,1
2015,3


Unnamed: 0_level_0,GW_ABM Count,new
PY,Unnamed: 1_level_1,Unnamed: 2_level_1
2003,1,0.0
2005,2,1.0
2007,2,2.0
2008,1,1.0
2009,2,1.0
2010,5,5.0
2012,1,1.0
2013,1,1.0
2014,1,0.0
2015,3,0.0


In [521]:
# Color Selection
cmap = plt.cm.get_cmap('Accent')
in_list = [0.1,0.3,0.5,0.7,0.8,0.9]
colour_list=[]
for element in in_list:
    rgba = cmap(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list.append(rgb)
colour_list

['rgb(127, 201, 127)',
 'rgb(253, 192, 134)',
 'rgb(56, 108, 176)',
 'rgb(240, 2, 127)',
 'rgb(191, 91, 22)',
 'rgb(102, 102, 102)']

In [522]:
%config InlineBackend.figure_format ='retina'
years = list(range(2003, 2020))
topic_AG = [1,0,1,0,0,0,1,0,0,0,0,1,3,2,2,1,6]

trace1 = go.Bar(
                x = concat.index.values,
                y = concat["new"],
                name = "Other topics",
                marker_color = colour_list[2],
    marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.7,
                #width=0.3            
                text = concat['GW_ABM Count'])

trace2 = go.Bar(
                x = years,
                y = topic_AG,
                name = 'Agriculture',
                marker_color = 'rgb(68, 190, 112)',
    marker_line_color='rgb(72, 35, 116)', marker_line_width=0.1, opacity=0.95,
                text = topic_AG)

data = [trace1,trace2]

layout = go.Layout(width= 1000, height= 600,
              xaxis= dict(title= '<b>Publication Year<b>',ticklen= 5,tickvals=years,zeroline=False,ticks = "inside", title_font=dict(size=18), showline=False, linewidth=1, linecolor='black', tickangle=-45, showgrid=False),
              yaxis= dict(title= '<b>Number of Documents<b>',tick0 = 0,dtick = 1,ticklen= 5,zeroline=False, ticks = "inside", title_font=dict(size=18),showline=False, linewidth=1, linecolor='black'),
              showlegend = True, legend=dict(traceorder='normal',orientation='h',yanchor='bottom',xanchor='center',y=1,x=0.5),
              annotations=[dict(x=0.5,y=1.12,align="right",valign="top",text='<b>Legend<b>',font=dict(size=15),showarrow=False,
              xref="paper",yref="paper",xanchor="center",yanchor="top")],template="seaborn",
              barmode='stack',bargap=0.15)
             
            
fig = go.Figure(data = data, layout = layout)

url = plot(fig, validate=False)
url
#"plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"

'temp-plot.html'

### With distinction over Topics (AG, WS, OT)

In [74]:
topic_AG = [1,0,1,0,0,0,1,0,0,0,0,1,3,2,3,1,7]
topic_WS = [0,0,2,0,2,1,0,5,0,1,1,0,1,0,1,1,1]
topic_OT = [0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,2,1]

suma = 0
for element in topic_OT:
    suma = suma + element
print(suma)

5


In [59]:
import matplotlib.pyplot as plt
import matplotlib.cm

# Available color scales: https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
#print(plt.colormaps())

# Good ones: hsv; tab20b; tab20c; twilight; twilight_shifted
cmap = plt.cm.get_cmap('magma')
in_list = [0.1,0.3,0.5,0.7,0.8,0.9]
colour_list=[]
for element in in_list:
    rgba = cmap(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list.append(rgb)
colour_list

cmap2 = plt.cm.get_cmap('viridis')
colour_list2=[]
for element in in_list:
    rgba = cmap2(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list2.append(rgb)
colour_list2

# Color blind friendly pallet
color_blind_friendly_list = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#D55E00", "#0072B2",  "#CC79A7"]

['rgb(20, 13, 53)',
 'rgb(99, 25, 127)',
 'rgb(182, 54, 121)',
 'rgb(246, 112, 91)',
 'rgb(253, 159, 108)',
 'rgb(253, 207, 146)']

['rgb(72, 35, 116)',
 'rgb(52, 94, 141)',
 'rgb(32, 144, 140)',
 'rgb(68, 190, 112)',
 'rgb(121, 209, 81)',
 'rgb(189, 222, 38)']

In [60]:
%config InlineBackend.figure_format ='retina'
years = list(range(2003, 2020))
topic_AG = [1,0,1,0,0,0,1,0,0,0,0,1,3,2,3,1,7]
topic_WS = [0,0,2,0,2,1,0,5,0,1,1,0,1,0,1,1,1]
topic_OT = [0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,2,1]

#trace1 = go.Bar(
#                x = concat.index.values,
#                y = concat["GW_ABM Count"],
#                name = "GW-ABM Database (CAGR 24%)",
#                marker_color = colour_list[2],
#    #marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.7,
#                #width=0.3            
#                text = concat['GW_ABM Count'])
#                #textposition = "auto"
#
trace2 = go.Bar(
                x = years,
                y = topic_AG,
                name = 'Agriculture',
                marker_color = colour_list2[3],
    marker_line_color=colour_list2[0], marker_line_width=0.1, opacity=0.95,
                text = topic_AG)

trace3 = go.Bar(
                x = years,
                y = topic_WS,
                name = 'Water Supply',
                marker_color = colour_list2[1],
    marker_line_color=colour_list2[0], marker_line_width=0.1, opacity=0.95,
                text = topic_WS)

trace4 = go.Bar(
                x = years,
                y = topic_OT,
                name = 'Other',
                marker_color = colour_list2[0],
    marker_line_color=colour_list2[0], marker_line_width=0.1, opacity=0.95,
                text = topic_OT)

#trace3 = go.Scatter(
#                x = concat.index.values,
#                y = concat["GW_ABM Count"],
#                mode = 'lines+markers',
#                name = "(1) GW-ABM   ",
#                text = concat['GW_ABM Count'],
#                line = dict(color='rgb(72, 35, 116)', width=1.5),
#                marker=dict(color='Purple',size=7,line=dict(color='MediumPurple',width=0.5)))
#

data = [trace2,trace3,trace4]

layout = go.Layout(width= 1000, height= 600,
              xaxis= dict(title= '<b>Publication Year<b>',ticklen= 5,tickvals=years,zeroline=False,ticks = "inside", title_font=dict(size=18), showline=False, linewidth=1, linecolor='black', tickangle=-45, showgrid=False),
              yaxis= dict(title= '<b>Number of Documents<b>',tick0 = 0,dtick = 1,ticklen= 5,zeroline=False, ticks = "inside", title_font=dict(size=18),showline=False, linewidth=1, linecolor='black'),
              showlegend = True, legend=dict(traceorder='normal',orientation='h',yanchor='bottom',xanchor='center',y=1,x=0.5),
              annotations=[dict(x=0.5,y=1.12,align="right",valign="top",text='<b>Legend<b>',font=dict(size=18),showarrow=False,
              xref="paper",yref="paper",xanchor="center",yanchor="top")],template="plotly",
              barmode='stack',bargap=0.15)
             
            
fig = go.Figure(data = data, layout = layout)

# GWABM = 2
#fig.add_annotation(
#    go.layout.Annotation(xref='paper',yref="paper",x=1.01,y=0.315,xanchor='left', yanchor='middle', text='<b>1</b> (24%)',font=dict(size=13,color='rgb(72, 35, 116)'),showarrow=False))

url = plot(fig, validate=False)
url
#"plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"

'temp-plot.html'

## **Sunburst/pie/bar plot (plotly)**

In [None]:
import plotly.graph_objects as go
#colour_list.append("rgb(127, 201, 127)")
#colour_list.append("rgb(127, 201, 127)")
print(len(colour_list))
labels = ["EM", "SH", "PM", "TC","EM+GW", "SH+GW", "PM+GW", "TC+GW"]
values=[786, 267, 349, 128, 27, 49, 29,2]

fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_traces(hoverinfo='label+percent', textinfo='label+text', textfont_size=20,
                  marker=dict(colors=colour_list, line=dict(color='#000000', width=2)))

url = plot(fig, validate=False)
url

In [None]:
fig =go.Figure(go.Sunburst(
#behind
  labels= [
    "Total", "EM", "SH", "PM", "TC","EM+GW", "SH+GW", "PM+GW", "TC+GW"
  ],
#core
  parents=[
    "","Total", "Total", "Total", "Total", "EM", "SH", "PM", "TC"
  ],
    values=[1524, 786, 267, 349, 128, 27, 49, 29,2]
))
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))

url = plot(fig, validate=False)
url

oficial

In [523]:
# Color Selection
cmap = plt.cm.get_cmap('Accent')
in_list = [0.1,0.3,0.5,0.7,0.8,0.9]
colour_list=[]
for element in in_list:
    rgba = cmap(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list.append(rgb)
colour_list

['rgb(127, 201, 127)',
 'rgb(253, 192, 134)',
 'rgb(56, 108, 176)',
 'rgb(240, 2, 127)',
 'rgb(191, 91, 22)',
 'rgb(102, 102, 102)']

In [524]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

labels_frameworks = ["EM", "SH", "PM", "TC"]
labels_gw = ["GW-EM", "GW-SH", "GW-PM", "GW-TC"]
labels_corrected = ["GW-ABM","GW-EM", "GW-SH", "GW-PM", "GW-TC"]

colors_gw = ['rgb(72, 35, 116)',colour_list[2],colour_list[0],colour_list[3],colour_list[1]]
colors_frameworks = ["rgb(31,119,180)","rgb(44,160,44)","rgb(214,39,40)","rgb(201,219,37)"]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])


fig.add_trace(go.Pie(labels=labels_frameworks, values=[786, 267, 349, 128],textinfo='value',textfont_size=45,
              marker_colors=colors_frameworks,opacity=0.87),1, 1)
fig.add_trace(go.Pie(labels=labels_corrected, values=[40,27, 49, 29, 2],textinfo='value',textfont_size=45,textposition='inside',
                     marker_colors=colors_gw,opacity=0.9),1, 2)

fig.update_traces(hoverinfo="label+name")

fig.update_layout(showlegend=False,
                  annotations=[dict(text='<b>Original Frameworks</b>', x=0.08, y=1.05, font_size=35, showarrow=False),
                 dict(text='<b>GW Related</b>', x=0.87, y=1.05, font_size=35, showarrow=False)])
fig.show()

url = plot(fig, validate=False)
url
# scalegroup='one'  in trace (for scaling)
# hole=0.2   in update_traces to create donut

'temp-plot.html'

In [None]:
import plotly.graph_objects as go

x=['EM', 'SH', 'PM', 'TC']
fig = go.Figure(go.Bar(x=x, y=[2,5,1,9], name='Montreal'))
fig.add_trace(go.Bar(x=x, y=[1, 4, 9, 16], name='Ottawa'))
fig.add_trace(go.Bar(x=x, y=[6, 8, 4.5, 8], name='Toronto'))

fig.update_layout(barmode='stack', xaxis={'categoryorder':'category ascending'})
fig.show()



import plotly.graph_objects as go
x = [1, 2, 3, 4]

fig = go.Figure()
fig.add_trace(go.Bar(x=x, y=[786, 27]))
fig.add_trace(go.Bar(x=x, y=[286, 49]))
fig.add_trace(go.Bar(x=x, y=[349, 29]))
fig.add_trace(go.Bar(x=x, y=[128, 2]))

fig.update_layout(barmode='relative', title_text='Relative Barmode')
fig.show()
url = plot(fig, validate=False)
url

In [40]:
# Color for Frameworks
cmap = plt.cm.get_cmap('tab20c')
in_list = [0.1,0.3,0.5,0.7,0.8,0.9]
colour_list=[]
for element in in_list:
    rgba = cmap(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list.append(rgb)
colour_list

cmap2 = plt.cm.get_cmap('viridis')
colour_list2=[]
for element in in_list:
    rgba = cmap2(element)
    rgba = tuple(int((255*x)) for x in rgba[0:3])
    rgb = 'rgb'+str(rgba)
    colour_list2.append(rgb)
colour_list2

['rgb(158, 202, 225)',
 'rgb(253, 174, 107)',
 'rgb(161, 217, 155)',
 'rgb(188, 189, 220)',
 'rgb(99, 99, 99)',
 'rgb(189, 189, 189)']

['rgb(72, 35, 116)',
 'rgb(52, 94, 141)',
 'rgb(32, 144, 140)',
 'rgb(68, 190, 112)',
 'rgb(121, 209, 81)',
 'rgb(189, 222, 38)']

In [277]:
import plotly.graph_objects as go
frameworks=['<b>EM</b>', '<b>SH</b>', '<b>PM</b>', '<b>TC</b>']
colors_gw_related = [colour_list[2],colour_list[0],colour_list[3],colour_list[1]]
colors_frameworks = ["rgb(31,119,180)","rgb(44,160,44)","rgb(214,39,40)","rgb(201,219,37)"]

y1 = [786, 286, 349, 128]
y2 = [27, 49, 29, 2]

trace1 = go.Bar(x = frameworks,
                y = [786, 286, 349, 128],
                text=y1, textposition = 'auto',
                #marker_color=colors_gw_related,
               name='<i>Frameworks</i>',
               marker=dict(color="rgb(31,119,180)",line=dict(color='rgb(8,48,107)',width=1)))

trace2 = go.Bar(x = frameworks,
                y=[27, 49, 29, 2],
                text=y2,textposition='auto',
                #marker_color=colors_gw_related,
               opacity=0.7,
               name='<i>GW Related</i>',
               marker=dict(color="rgb(44,160,44)",line=dict(color='rgb(8,48,107)',width=1)))

data = [trace1, trace2]

layout = go.Layout(width= 500, height= 500,barmode='group',bargap=0.15,bargroupgap=0.01,template="plotly_white",
                 showlegend = True, legend=dict(traceorder='normal',orientation='h',yanchor='bottom',xanchor='center',y=1,x=0.5),
                 yaxis= dict(showticklabels=False,showgrid=False,showline=False,zeroline=False),
                 xaxis= dict(showgrid=False,tickfont=dict(family='Arial', color='black', size=15)))

fig = go.Figure(data = data, layout = layout)
fig.show()
url = plot(fig, validate=False)
url
#"plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"
# Annotation: annotations=[dict(x=0.5,y=1.12,align="right",valign="top",text='<b>Legend<b>',font=dict(size=15),showarrow=False,
              #xref="paper",yref="paper",xanchor="center",yanchor="top")]
# Axis title: title= '<b>Framework<b>',title_font=dict(size=18)

'temp-plot.html'

In [276]:

fig = go.Figure(data=[
    go.Bar(name='Frameworks', x=frameworks, y=[786, 286, 349, 128],text=y1,textposition='auto',marker_color=colors_gw_related),
    go.Bar(name='GW Related Frameworks', x=frameworks, y=[27, 49, 29, 2],text=y2,textposition='auto',marker_color=colors_gw_related,opacity=0.7)
])



# Change the bar mode
fig.update_layout(width= 1000, height= 600,barmode='group',bargap=0.2,bargroupgap=0.05,
                  annotations=[dict(x=0.5,y=1.12,align="right",valign="top",text='<b>Legend<b>',font=dict(size=15),showarrow=False,
              xref="paper",yref="paper",xanchor="center",yanchor="top")],template="seaborn",
                 showlegend = True, legend=dict(traceorder='normal',orientation='h',yanchor='bottom',xanchor='center',y=1,x=0.5),
                 yaxis= dict(dtick = 1000,showgrid=False,showline=False,zeroline=False),
                 xaxis= dict(title= '<b>Framework<b>',title_font=dict(size=18)))

          
fig.show()
url = plot(fig, validate=False)
url

'temp-plot.html'

## **Tree maps**

In [255]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import pandas as pd
labels_size=[20]

fig = go.Figure(go.Treemap(
    labels = ["Water Supply","Agriculture","Other", "Ecological Dynamics", "Water Security", "Human Evolution", 'ATES Systems'],
    parents = ["", "", "", "Other", "Other", "Other", "Other"],
    values =  [10, 17, 4, 1, 1, 1, 1],   #refer to labels
    textinfo = "label+value",
    outsidetextfont = {"size": 20, "color": "darkblue"},
    marker = dict(line = dict(width= 1,color='white'),colors=["royalblue", "green", "lightgray", "purple", "cyan", "brown", "yellow"]),
    pathbar = {"visible": False},
textposition = 'middle center',
textfont=dict()))
fig.update_layout(width= 1200, height= 600)
# marker_colors = ["pink", "royalblue", "lightgray", "purple", "cyan", "lightgray", "lightblue"],

url = plot(fig, validate=False)
url

'temp-plot.html'

## **Sources Dynamics**

In [None]:
SD_GW_ABM = pd.DataFrame({'GW_ABM Count' : GW_ABM.groupby( ["PY"] ).size()})

In [None]:
import pandas as pd
GW_ABM = pd.read_csv("Cleaning Files GWABM/GWABM_DB.csv")
print(GW_ABM[['PY','SO']])
SD = GW_ABM.groupby(['PY','SO'])
a = pd.DataFrame(SD.size())

## **Author keywords CDF**

**Here we read files**

In [None]:
KW = pd.read_csv("KW_CDF.csv", index_col = 'Year')
#KW.head()
KW = KW.drop(KW.columns[0], axis=1)
#KW.head()
KW.tail()
# Now we erase year 2020
KW.drop(KW.tail(1).index, inplace=True)
KW.drop(KW.tail(1).index, inplace=True)
KW.tail()

In [None]:
KW = pd.read_csv("KW_PDF.csv", index_col = 'Year')
#KW.head()
KW = KW.drop(KW.columns[0], axis=1)
#KW.head()
KW.tail()
# Now we erase year 2020
KW.drop(KW.tail(1).index, inplace=True)
KW.tail()

## **Annual Scientific Production (Matplotlib)**

In [None]:
# From the Temporary Variables section we use the final cleaned databases to get the annual scientific production 
GW_ABM = pd.read_csv("Cleaning Files GWABM/GWABM_DB.csv")
SH = pd.read_csv("Cleaning Files SH/SH_DB.csv")
TC = pd.read_csv("Cleaning Files TC/TC_DB.csv")
PM = pd.read_csv("Cleaning Files PM/PM_DB.csv")
EM = pd.read_csv("Cleaning Files EM/EM_DB.csv")
EM_2 = pd.read_csv("Cleaning Files EM2/EM2_DB.csv")
print(SH.shape)
print(TC.shape)
print(PM.shape)
print(EM_2.shape)

ASP_GW_ABM = pd.DataFrame({'GW_ABM Count' : GW_ABM.groupby( ["PY"] ).size()})
ASP_SH = pd.DataFrame({'SH Count' : SH.groupby( ["PY"] ).size()})
ASP_TC = pd.DataFrame({'TC Count' : TC.groupby( ["PY"] ).size()})
ASP_PM = pd.DataFrame({'PM Count' : PM.groupby( ["PY"] ).size()})
ASP_EM = pd.DataFrame({'EM Count' : EM.groupby( ["PY"] ).size()})
ASP_EM_2 = pd.DataFrame({'EM2 Count' : EM_2.groupby( ["PY"] ).size()})

frames = [ASP_GW_ABM, ASP_SH, ASP_TC, ASP_PM, ASP_EM, ASP_EM_2]
concat = pd.concat(frames, sort = False, axis = 1)
concat = concat.fillna(0)
# We drop year 2020 to avoid bias
concat
concat.drop(concat.tail(1).index, inplace=True)
concat
#concat.plot.line()

In [None]:
%matplotlib 
%matplotlib inline
# This turns on “inline plotting”, where plot graphics will appear in the notebook without "plt.show()". Besides, cells below the plotting won't affect the graph.

# A matplotlib plot consists of a figure object (like a canvas), an axes object (like a subplot or panel), and other objects (title, axis, colorbar...).
# Accordingly, a plot can be developed by creating or modifying the different objectects (Figure, axes,...).
import matplotlib.pyplot as plt

ax = plt.gca()

concat.plot(kind='line',y='GW_ABM Count',color='skyblue', ax = ax, linewidth = 2, linestyle = '-', label = 'GW_ABM Count', alpha = 1)
concat.plot(kind='line',y='SH Count',color='orange', ax = ax)
concat.plot(kind='line',y='TC Count',color='green', ax = ax)
concat.plot(kind='line',y='PM Count',color='red', ax = ax)
concat.plot(kind='line',y='EM Count',color='magenta', ax = ax)
concat.plot(kind='line',y='EM2 Count',color='brown', ax = ax)

plt.ylabel('Number of Documents')
plt.xlabel('Publication Year')

#legend = plt.gca().get_legend_handles_labels()
#plt.legend(reversed_handles,labels,loc='lower right')

#plt.show()


# Saving the Figure
#plt.savefig('Annual Scientific Production.png')
#plt.savefig('Annual Scientific Production.png', quality = 95, dpi=None, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format=None, transparent=False, bbox_inches=None, pad_inches=0.1, frameon=None, metadata=None)

# Re-opening the saved figure:
#from IPython.display import Image
#Image('Annual Scientific Production.png')

#ax = plt.gca()
#df.groupby('PY')['SO'].nunique().plot(kind='line', ax=ax)
#plt.show()

In [None]:
# Create the figure object
fig = plt.figure(figsize=(12,8))

# Add first axes object (of a multi-panel plot with two rows and one column)
ax = fig.add_subplot(211)
concat.plot(kind='line',y='GW_ABM Count',color='skyblue', ax = ax, linewidth = 2, linestyle = '-', label = 'GW_ABM Count', alpha = 1)
plt.title("Annual Scientific Production")
plt.xlabel("Publication Year")

# Add second axes object
ax = fig.add_subplot(212)
concat.plot(kind='line',y='SH Count',color='orange', ax = ax)
plt.title("Annual Scientific Production")
plt.xlabel("Publication Year")

# Make sure the elements of the plot are arranged properly
plt.tight_layout()

In [None]:
# We can also calculate the Compound Annual Growth Rate by: 
print("Annual Compound Growth Rate is: ", ((ASP_SH.iloc[-1]/ASP_SH.iloc[0])**(1/(ASP_SH.index[-1] - ASP_SH.index[0])))-1)
# (end_value / start_value) ** (1 / (num_periods - 1)) - 1

## **Author Keywords CDF (Matplotlib)**

In [None]:
%matplotlib 
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

KW = pd.read_csv("KW.csv", index_col = 0)
ax = plt.gca()
KW.plot(kind='line',y='PARTICIPATORY MODELING',color='skyblue', ax = ax, linewidth = 2, linestyle = '-', label = 'PARTICIPATORY MODELING', alpha = 1)
KW.plot(kind='line',y='TELE-COUPLING',color='red', ax = ax, linewidth = 2, linestyle = '-', label = 'TELE-COUPLING', alpha = 1)
KW.plot(kind='line',y='SOCIO-HYDROLOGY',color='orange', ax = ax, linewidth = 2, linestyle = '-', label = 'SOCIO-HYDROLOGY', alpha = 1)
KW.plot(kind='line',y='EXPLORATORY MODELING',color='green', ax = ax, linewidth = 2, linestyle = '-', label = 'EXPLORATORY MODELING', alpha = 1)

plt.ylabel('CDF of Author Keywords')
plt.xlabel('Year')


In [None]:
%matplotlib 
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

KW = pd.read_csv("KW.csv", index_col = 0)
KW.head(2)

#ax = plt.gca()

KW.plot(x='Year', kind='line', figsize = (10,10))
#KW.plot(kind='line',y='PARTICIPATORY MODELING',color='skyblue', ax = ax, linewidth = 2, linestyle = '-', label = 'PARTICIPATORY MODELING', alpha = 1)
#KW.plot(kind='line',y='SH Count',color='orange', ax = ax)
#concat.plot(kind='line',y='TC Count',color='green', ax = ax)
#concat.plot(kind='line',y='PM Count',color='red', ax = ax)
#concat.plot(kind='line',y='EM Count',color='magenta', ax = ax)
#concat.plot(kind='line',y='EM2 Count',color='brown', ax = ax)
#
plt.ylabel('CDF of Author Keywords')
plt.xlabel('Year')

In [None]:
df[]

In [9]:
GW_ABM.shape

(39, 31)

In [66]:
papers_list=[1,2,8,10,11,14,17,18,21,22,25,31,32,33,35]
for element in papers_list:
    GW_ABM['TI'][element]
    GW_ABM['SR'][element]
GW_ABM.head()

'LARGESCALE WATER RESOURCES MANAGEMENT WITHIN THE FRAMEWORK OF GLOWADANUBE PART A THE GROUNDWATER MODEL'

'BARTHEL R, 2005, PHYS CHEM EARTH'

'LARGESCALE WATER RESOURCES MANAGEMENT WITHIN THE FRAMEWORK OF GLOWADANUBE THE WATER SUPPLY MODEL'

'NICKEL D, 2005, PHYS CHEM EARTH'

'ASSESSING GROUNDWATER POLICY WITH COUPLED ECONOMIC GROUNDWATER HYDROLOGIC MODELING'

'MULLIGAN K, 2014, WATER RESOUR RES'

'GLOBAL SENSITIVITY ANALYSIS FOR LARGESCALE SOCIOHYDROLOGICAL MODELS USING HADOOP'

'HU Y, 2015, ENVIRON MODELL SOFTW'

'AN AGENTBASED PLATFORM FOR SIMULATING COMPLEX HUMANAQUIFER INTERACTIONS IN MANAGED GROUNDWATER SYSTEMS'

'CASTILLA-RHO J, 2015, ENVIRON MODELL SOFTW'

'ON THE ROLE OF INDIVIDUALS IN MODELS OF COUPLED HUMAN AND NATURAL SYSTEMS LESSONS FROM A CASE STUDY IN THE REPUBLICAN RIVER BASIN'

'NOEL P, 2017, ENVIRON MODELL SOFTW'

'COMBINING HUMAN AND MACHINE INTELLIGENCE TO DERIVE AGENTS BEHAVIORAL RULES FOR GROUNDWATER IRRIGATION'

'HU Y, 2017, ADV WATER RESOUR'

'METHODS FOR PLANNING OF ATES SYSTEMS'

'BLOEMENDAL M, 2018, APPL ENERGY'

'ASSESSING THE EFFECTS OF WATER RESTRICTIONS ON SOCIOHYDROLOGIC RESILIENCE FOR SHARED GROUNDWATER SYSTEMS'

'AL-AMIN S, 2018, J HYDROL'

'A LINKED MODELLING FRAMEWORK TO EXPLORE INTERACTIONS AMONG CLIMATE SOIL WATER AND LAND USE DECISIONS IN THE ARGENTINE PAMPAS'

'GARCIA G, 2019, ENVIRON MODELL SOFTW'

'A COUPLED SIMULATION ARCHITECTURE FOR AGENTBASEDGEOHYDROLOGICAL MODELLING WITH NETLOGO AND MODFLOW'

'JAXA-ROZEN M, 2019, ENVIRON MODELL SOFTW'

'EFFECT OF HYDROGEOLOGIC AND CLIMATIC VARIABILITY ON PERFORMANCE OF A GROUNDWATER MARKET'

'KHAN H, 2019, WATER RESOUR RES'

'COMPARING THE ECONOMIC AND ENVIRONMENTAL EFFECTS OF DIFFERENT WATER MANAGEMENT SCHEMES USING A COUPLED AGENTHYDROLOGIC MODEL'

'LEI X, 2019, J WATER RESOUR PLANN MANAGE'

'SUSTAINABLE GROUNDWATER MANAGEMENT HOW LONG AND WHAT WILL IT TAKE'

'CASTILLA-RHO J, 2019, GLOBAL ENVIRON CHANGE'

'A COUPLED AGENTBASED RISKBASED OPTIMIZATION MODEL FOR INTEGRATED URBAN WATER MANAGEMENT'

'BAKHTIARI P, 2020, SUSTAINABLE CITIES SOC'

Unnamed: 0,AU,TI,SO,JI,AB,DE,ID,LA,DT,DT2,...,DB,AU_UN,AU1_UN,AU_UN_NR,SR_FULL,SR,CR_AU,CR_SO,AU_CO,AU1_CO
0,FEUILLETTE S;BOUSQUET F;LE G P,SINUSE A MULTIAGENT MODEL TO NEGOTIATE WATER D...,ENVIRONMENTAL MODELLING AND SOFTWARE,ENVIRON. MODELL. SOFTW.,MANY WATER TABLES ARE CURRENTLY OVEREXPLOITED ...,WATER TABLE OVEREXPLOITATION; IRRIGATION; MULT...,HOMO-ECONOMICUS; GROUNDWATER; SYSTEMS; COMMONS,ENGLISH,ARTICLE,ARTICLE,...,ISI,NOTREPORTED,NOTREPORTED,,"FEUILLETTE S, 2003, ENVIRON MODELL SOFTW","FEUILLETTE S, 2003, ENVIRON MODELL SOFTW",AKHY RB;ANTONA M;ARMSTRONG A;BALMANN A;BARRETE...,HOUILLE BLANCHE;LECT NOTES ARTIF INT;AGR ECOSY...,FRANCE;FRANCE;THAILAND,THAILAND
1,BARTHEL R;ROJANSCHI V;WOLF J;BRAUN J,LARGESCALE WATER RESOURCES MANAGEMENT WITHIN T...,PHYSICS AND CHEMISTRY OF THE EARTH,PHYS. CHEM. EARTH,"THE RESEARCH PROJECT GLOWA-DANUBE, FINANCED BY...",DANUBE; GROUNDWATER MODEL; MODFLOW; HYDROGEOLO...,,ENGLISH,"ARTICLE, PROCEEDINGS PAPER",ARTICLE,...,ISI,UNIV STUTTGART,UNIV STUTTGART,,"BARTHEL R, 2005, PHYS CHEM EARTH","BARTHEL R, 2005, PHYS CHEM EARTH",BOOCH GJ;HENNICKER R;KRAUS A;MAUSER W;MAUSER W...,UML USER HDB.;GLOWA GERMAN PROGRAM;002 GLOWA.;...,GERMANY,GERMANY
2,NICKEL D;BARTHEL R;BRAUN J,LARGESCALE WATER RESOURCES MANAGEMENT WITHIN T...,PHYSICS AND CHEMISTRY OF THE EARTH,PHYS. CHEM. EARTH,"THE RESEARCH PROJECT GLOWA-DANUBE, FINANCED BY...",DECISION SUPPORT SYSTEM; SUSTAINABLE WATER SUP...,,ENGLISH,"ARTICLE, PROCEEDINGS PAPER",ARTICLE,...,ISI,UNIV STUTTGART,UNIV STUTTGART,,"NICKEL D, 2005, PHYS CHEM EARTH","NICKEL D, 2005, PHYS CHEM EARTH",BMBF,AKT NACHH WETTB DTSC.,GERMANY,GERMANY
3,ZELLNER M,GENERATING POLICIES FOR SUSTAINABLE WATER USE ...,ENVIRONMENT AND PLANNING B: PLANNING AND DESIGN,ENVIRON. PLAN. B-PLAN. DES.,RAPIDLY DECLINING GROUNDWATER LEVELS SINCE THE...,,MANAGEMENT,ENGLISH,ARTICLE,ARTICLE,...,ISI,UNIV MICHIGAN,UNIV MICHIGAN,,"ZELLNER M, 2007, ENVIRON PLAN B-PLAN DES","ZELLNER M, 2007, ENVIRON PLAN B-PLAN DES",BECU N;BOGART W T;DEP ENV QUAL,ECOL MODEL;EC CITIES SUBURBS.;GREAT LAK MAP.,USA,USA
4,STRATON A;HECKBERT S;WARD J;SMAJGL A,EFFECTIVENESS OF A MARKETBASED INSTRUMENT FOR ...,WATER RESOURCES,WATER RESOUR.,THE SEASONAL NATURE OF AUSTRALIA'S TROPICAL RI...,,,ENGLISH,ARTICLE,ARTICLE,...,ISI,NOTREPORTED,NOTREPORTED,,"STRATON A, 2009, WATER RESOUR","STRATON A, 2009, WATER RESOUR",BEGG GW;BERNSTEIN PM;BLANCH S;BOUSQUET F;CAMER...,INVENTORY RISK ASSES.;RESOUR ENERGY ECON;AQUAT...,AUSTRALIA;AUSTRALIA;AUSTRALIA,AUSTRALIA


In [None]:
# From the authors column, these ones are in the middle
BARTHEL R (1) 
NICKEL D; BARTHEL R (2)
ZELLNER M (3)
YANG Y (8)
CAI X (10)
CASTILLA-RHO J;MARIETHOZ G;ROJAS R;ANDERSEN (11)
NIKOO M
NOEL P;CAI X (14)
CAI X (17)
BLOEMENDAL M;JAXA-ROZEN M (18)
LARSON K (21)
RAJAGOPALAN B (22)
JAXA-ROZEN M; BLOEMENDAL M (25)
ZELLNER M (28)
ZELLNER M (30)
KHAN H (31)
YANG Y (32)
CASTILLA-RHO J;ROJAS R;ANDERSEN M;MARIETHOZ G (33)
NIKOO M (35)
