In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

plt.rcParams["figure.figsize"] = (10,6)
#plt.rcParams["xtick.labelsize"] = 7

Yearly Citations Figure

In [None]:
dfYC = pd.read_csv('../Data/YearlyCitations.csv')

dfYCEdu = pd.DataFrame(columns = ['Year','Citations','Type'])
dfYCEdu['Citations'] = dfYC['Education']
dfYCEdu['Year'] = dfYC['Year']
dfYCEdu['Type'] = ['Education' for x in range(len(dfYCEdu.index))]

dfYCCom = pd.DataFrame(columns = ['Year','Citations','Type'])
dfYCCom['Citations'] = dfYC['Company']
dfYCCom['Year'] = dfYC['Year']
dfYCCom['Type'] = ['Company' for x in range(len(dfYCCom.index))]

dfYCBoth = pd.DataFrame(columns = ['Year','Citations','Type'])
dfYCBoth['Citations'] = dfYC['Cooperation']
dfYCBoth['Year'] = dfYC['Year']
dfYCBoth['Type'] = ['Cooperation' for x in range(len(dfYCBoth.index))]

dfYCAll = pd.concat([dfYCEdu, dfYCCom, dfYCBoth], ignore_index=True)

# Yearly Citations Lineplot
ax = sns.lineplot(data=dfYCAll,
            x='Year',
            y='Citations',
            hue='Type',
            marker='o',
            linewidth = 3.5,
            palette=['tab:blue','tab:orange','tab:grey']
            )
sns.set(font_scale=1.75)
plt.xlabel("")
plt.show()

Paper Count Figures

In [None]:
df7Edu = pd.read_csv('../Data/Fig7Edu.csv')

fig7E = sns.barplot(data=df7Edu,
            x='Company',
            y='Paper Count',
            color='tab:blue'
            )
fig7E.set_xticklabels(fig7E.get_xticklabels(), rotation=40, ha="right")
plt.xlabel("")
plt.show()


In [None]:
df7Com = pd.read_csv('../Data/Fig7Com.csv')

fig7C = sns.barplot(data=df7Com,
            x='Company',
            y='Paper Count',
            color='tab:orange'
            )
fig7C.set_xticklabels(fig7C.get_xticklabels(), rotation=40, ha="right")
plt.xlabel("")
plt.show()


Citation Count Figures

In [None]:
df8Edu = pd.read_csv('../Data/Fig8Edu.csv')

fig8E = sns.barplot(data=df8Edu,
            x='Company',
            y='Citation Count',
            color='tab:blue'
            )
fig8E.set_xticklabels(fig8E.get_xticklabels(), rotation=40, ha="right")
plt.xlabel("")
plt.ylabel("Total Citations")
plt.show()

In [None]:
df8Com = pd.read_csv('../Data/Fig8Com.csv')

fig8C = sns.barplot(data=df8Com,
            x='Company',
            y='Citation Count',
            color='tab:orange'
            )
fig8C.set_xticklabels(fig8C.get_xticklabels(), rotation=40, ha="right")
plt.xlabel("")
plt.ylabel("Total Citations")
plt.show()

Publication and Citation Shares

In [None]:
sns.set(font_scale=2)

In [None]:
df9Com = pd.read_csv('../Data/Fig9Com.csv')

colors = sns.color_palette()[0:6]
explode = [0.1,0,0,0,0,0]

fig91 = plt.figure(figsize =(10, 8))
fig91 = plt.pie(df9Com['Papers'],
                labels=df9Com['Company'],
                labeldistance=None,
                startangle=90,
                pctdistance=0.8,
                colors=colors,
                autopct='%.0f%%',
                explode=explode
                )
plt.legend(bbox_to_anchor =(1,0.75), ncol = 1)
plt.show()

In [None]:
df9Com = pd.read_csv('../Data/Fig9Com.csv')

colors = sns.color_palette()[0:6]
explode = [0.1,0,0,0,0,0]

fig92 = plt.figure(figsize =(10, 8))
fig92 = plt.pie(df9Com['Citations'],
                labels=df9Com['Company'],
                labeldistance=None,
                startangle=90,
                pctdistance=0.8,
                colors=colors,
                autopct='%.0f%%',
                explode=explode
                )
plt.legend(bbox_to_anchor =(1,0.75), ncol = 1)
plt.show()

Mean citations per sample

In [None]:
df3 = pd.read_csv('../Data/Fig3.csv',)

fig3 = sns.barplot(data=df3,
            x='Sample',
            y='Average Citations',
            hue='Type',
            palette=['tab:blue','tab:orange','tab:grey']
            )
plt.legend(prop={'size': 16})
plt.xlabel("")
plt.show()


Boxplot

In [None]:
dfPapers = pd.read_csv('../Data/AllCit.tsv', sep='\t')
print(len(dfPapers.index))
dfPapers.sort_values('CitationCount',ascending=False, inplace=True)

# Remove all papers above the group-specific thresholds
dfEdu = dfPapers[dfPapers['GroupType']=='Education']
IQR = dfEdu['CitationCount'].quantile(.75)-dfEdu['CitationCount'].quantile(.25)
limit = IQR*1.5
print(limit)
dfEdu = dfEdu.drop(dfEdu[dfEdu['CitationCount']>limit].index)
dfCom = dfPapers[dfPapers['GroupType']=='Company']
IQR = dfCom['CitationCount'].quantile(.75)-dfCom['CitationCount'].quantile(.25)
limit = IQR*1.5
print(limit)
dfCom = dfCom.drop(dfCom[dfCom['CitationCount']>limit].index)
dfBoth = dfPapers[dfPapers['GroupType']=='Cooperation']
IQR = dfBoth['CitationCount'].quantile(.75)-dfBoth['CitationCount'].quantile(.25)
limit = IQR*1.5
print(limit)
dfBoth = dfBoth.drop(dfBoth[dfBoth['CitationCount']>limit].index)

# concat
df = pd.concat([dfEdu,dfCom,dfBoth], ignore_index=True)
print(len(df.index))

# Boxplot
#fig = plt.figure(figsize =(10, 8))
fig4 = sns.boxplot( x='GroupType', y='CitationCount',
            data=df, 
            showmeans=True, 
            meanprops={"marker": "+",
                       "markeredgecolor": "black",
                       "markersize": "10"},
            palette=['tab:blue','tab:orange','tab:grey']
            )
sns.set(font_scale=1.75)
plt.xlabel("")
plt.ylabel("Citations")
plt.show()

Correlation Matrix

In [None]:
dfAllAlt = pd.read_csv('../Data/AllAlt.tsv', sep='\t', header=0)
df = dfAllAlt[['CitationCount','Score','Readers_count','Cited_by_accounts_count','GroupType']]
df.rename(columns={ 'CitationCount':'Citations', 'Score':'Altmetric Score', 'Readers_count':'Online Readers', 'Cited_by_accounts_count':'Online Mentions'}, inplace = True)

corr_df=df.corr(method="pearson")
plt.figure(figsize =(10, 8))
plt.rc('font', size=12) 
fig10 = sns.heatmap(corr_df,annot=True)
fig10.figure.tight_layout()
plt.show()

Tag Matches

In [None]:
df13 = pd.read_csv('../Data/Fig13.csv')
df13 = df13.sort_values('Percent',ascending=False)

colors=['tab:blue','tab:blue','tab:blue','tab:red','tab:blue','tab:blue','tab:blue','tab:blue','tab:blue','tab:blue','tab:blue',]
fig13 = sns.barplot(data=df13,
            x='Percent',
            y='Subfield',
            palette=colors
            )

plt.ylabel("")
plt.show()

Scatterplot

In [None]:
dfAllAlt = pd.read_csv('../Data/AllAlt.tsv', sep='\t', header=0)
df = dfAllAlt[['CitationCount','Score','GroupType']]

# Apply logarithmic scale and norm both metrics to 1
df['CitationCount'] = np.log(df['CitationCount']+1)
df['Score'] = np.log(df['Score']+0.75)
df.rename(columns={ 'Score':'Altmetric Score', 'CitationCount':'Citation Count', 'GroupType': 'Group Type'}, inplace = True)


# Calculated in zones, to be able to isolate Exceptionals, Influencers and Scholars
df1 = df.loc[(df['Citation Count']>=np.log(43)) & (df['Altmetric Score']>=np.log(9)),]
print(  'Exceptionals: ',
        len(df1[df1['Group Type']=='Education'].index)/len(df[df['Group Type']=='Education'].index),
        len(df1[df1['Group Type']=='Company'].index)/len(df[df['Group Type']=='Company'].index),
        len(df1[df1['Group Type']=='Cooperation'].index)/len(df[df['Group Type']=='Cooperation'].index)
    )
df2 = df.loc[(df['Citation Count']<=np.log(3)) & (df['Altmetric Score']>=np.log(9)),]
print(  'Influencers: ',
        len(df2[df2['Group Type']=='Education'].index)/len(df[df['Group Type']=='Education'].index),
        len(df2[df2['Group Type']=='Company'].index)/len(df[df['Group Type']=='Company'].index),
        len(df2[df2['Group Type']=='Cooperation'].index)/len(df[df['Group Type']=='Cooperation'].index)
    )
df3 = df.loc[(df['Citation Count']>=np.log(43)) & (df['Altmetric Score']<=np.log(1.5)),]
print(  'Scholars: ',
        len(df3[df3['Group Type']=='Education'].index)/len(df[df['Group Type']=='Education'].index),
        len(df3[df3['Group Type']=='Company'].index)/len(df[df['Group Type']=='Company'].index),
        len(df3[df3['Group Type']=='Cooperation'].index)/len(df[df['Group Type']=='Cooperation'].index)
    )

# Scatterplot with borders
sns.set(font_scale=2)
plt.figure(figsize =(10, 8))
plt.axvline(x=np.log(1.5), color='black', linestyle='dashed', linewidth=1.5, alpha=0.66)
plt.axvline(x=np.log(9), color='black', linestyle='dashed', linewidth=1.5, alpha=0.66) 
plt.axhline(y=np.log(43), color='black', linestyle='dashed', linewidth=1.5, alpha=0.66)  
plt.axhline(y=np.log(3), color='black', linestyle='dashed', linewidth=1.5)

sns.scatterplot(data=df, x="Altmetric Score", y="Citation Count",  hue="Group Type", palette=['tab:blue','tab:orange','tab:grey']) #, alpha=0.5
sns.scatterplot(data=df1, x="Altmetric Score", y="Citation Count",  hue="Group Type", legend=False, palette=['tab:blue','tab:orange','tab:grey'])
sns.scatterplot(data=df2, x="Altmetric Score", y="Citation Count",  hue="Group Type", legend=False, palette=['tab:blue','tab:orange','tab:grey'])
sns.scatterplot(data=df3, x="Altmetric Score", y="Citation Count",  hue="Group Type", legend=False, palette=['tab:blue','tab:orange','tab:grey'])

plt.legend(loc ='upper left', prop={'size': 17})
plt.ylabel("Citations")
plt.show()

# Calculate the thresholds from above using this line
#print(dfAllAlt['CitationCount'].describe(percentiles=[.2,.8,]), dfAllAlt['Score'].describe(percentiles=[.2,.8]))