# API Final Analysis
### Daina Bouquin, Daniel Chivvis

Scripts below were used to generate all .csv files in the API_RESULTS_040519/ folder

In [99]:
import pandas as pd
import numpy as np

In [100]:
API_results = pd.read_csv("API_CLEAN_INPUT_040519.csv") 

In [101]:
list(API_results.columns.values)

['Alias',
 'Software_Package',
 'Identifier',
 'Highlight',
 'Citation',
 'Bibcode',
 'Alternate_Bibcode',
 'BibGroup',
 'Publisher',
 'Article_ID',
 'DOI',
 'Pub_Year',
 'Author',
 'Title']

In [102]:
API_results.head(5)

Unnamed: 0,Alias,Software_Package,Identifier,Highlight,Citation,Bibcode,Alternate_Bibcode,BibGroup,Publisher,Article_ID,DOI,Pub_Year,Author,Title
0,AstroBlend,AstroBlend,0,of this article.)Fig.9Isocontours generated wi...,False,2016A&C....15...50N,2016arXiv160203178N,CfA,Astronomy and Computing,2016arXiv160203178N|2016A&C....15...50N|10.101...,10.1016/j.ascom.2016.02.002,2016,"Naiman, J. P.",AstroBlend: An astrophysical visualization pac...
1,AstroBlend,AstroBlend,0,"From that perspective, recent efforts like fre...",True,2017PASP..129e8012V,2016arXiv161103862V,,Publications of the Astronomical Society of th...,2016arXiv161103862V|2017PASP..129e8012V|10.108...,10.1088/1538-3873/129/975/058012,2017,"Vogt, Fr�d�ric P. A.",Linking the X3D Pathway to Integral Field Spec...
2,AstroBlend,AstroBlend,0,"-passing the X3D file format altogether), we r...",False,2016ApJ...818..115V,2015arXiv151002796V,,The Astrophysical Journal,2015arXiv151002796V|2016ApJ...818..115V|10.384...,10.3847/0004-637X/818/2/115,2016,"Vogt, Fr�d�ric P. A.",Advanced Data Visualization in Astrophysics: T...
3,AstroBlend,AstroBlend,0,AstroBlend6 6 <em>www.astroblend.com&#x2F;</em...,True,2017PASP..129e8010G,2016arXiv161106965G,,Publications of the Astronomical Society of th...,2016arXiv161106965G|2017PASP..129e8010G|10.108...,10.1088/1538-3873/129/975/058010,2017,"G�rate, Mat�as",Voxel Datacubes for 3D Visualization in Blender
4,AstroBlend,AstroBlend,0,on Big Data from Space (BiDS 2014)2014232235Na...,True,2017A&C....19...45P,2017arXiv170306651P,,Astronomy and Computing,2017arXiv170306651P|2017A&C....19...45P|10.101...,10.1016/j.ascom.2017.03.004,2017,"Punzo, D.",SlicerAstro: A 3-D interactive visual analytic...


In [103]:
# Convert highlight col to string

API_results['Highlight'] = API_results['Highlight'].astype('|S')\

In [104]:
API_results.to_csv("API_FINAL_ANALYSIS_040519.csv")

## Summary of Results

In [105]:
# How many papers did we find for each software package?
API_results.groupby('Software_Package')['Bibcode'].nunique()

Software_Package
AstroBlend         5
AstroPy          781
PlasmaPy           2
RADMC-3D         455
SAOImage DS9    1135
Spec2d           248
Stingray           6
TARDIS            28
WCS Tools        326
Name: Bibcode, dtype: int64

In [106]:
# Proportion of articles with a software alias in a citation
API_all_citations = API_results.loc[API_results['Citation'] == True]
API_cite_proportion = API_all_citations.groupby('Software_Package')['Bibcode'].nunique()/API_results.groupby('Software_Package')['Bibcode'].nunique()
API_cite_proportion

Software_Package
AstroBlend      0.600000
AstroPy         0.742638
PlasmaPy        1.000000
RADMC-3D        0.742857
SAOImage DS9    0.437885
Spec2d          0.516129
Stingray        0.500000
TARDIS          0.892857
WCS Tools       0.582822
Name: Bibcode, dtype: float64

In [107]:
# How many unique aliases were used in citations for each package?
print (API_all_citations.groupby('Software_Package')['Alias'].nunique())

Software_Package
AstroBlend       2
AstroPy          7
PlasmaPy         1
RADMC-3D        11
SAOImage DS9     9
Spec2d           8
Stingray         2
TARDIS           3
WCS Tools       10
Name: Alias, dtype: int64


In [108]:
# Which aliases were used in the papers with aliases in references? Write results to csv
API_cite_aliases = pd.DataFrame({'cite_count' : API_all_citations.groupby(['Software_Package', 'Alias', 'Identifier'])['Bibcode'].nunique()}).reset_index()
API_cite_aliases.to_csv("API_cite_aliases_040519.csv")
API_cite_aliases

Unnamed: 0,Software_Package,Alias,Identifier,cite_count
0,AstroBlend,AstroBlend,0,3
1,AstroBlend,astroblend.com,0,1
2,AstroPy,10.1051/0004-6361/201322068,1,3
3,AstroPy,AstroPy,0,576
4,AstroPy,Astropy Collaboration et al. (2018),0,64
5,AstroPy,arXiv:1307.6212,1,6
6,AstroPy,arXiv:1801.02634,1,12
7,AstroPy,astropy.org,0,41
8,AstroPy,github.com/astropy,0,7
9,PlasmaPy,PlasmaPy,0,2


In [109]:
# proportion of identifiers among software aliases in citations 
API_identifier_prop = API_results.loc[API_results['Identifier'] == 1]
API_identifier_prop = API_identifier_prop.groupby('Software_Package')['Bibcode'].nunique()/API_results.groupby('Software_Package')['Bibcode'].nunique()
API_identifier_prop

Software_Package
AstroBlend           NaN
AstroPy         0.089629
PlasmaPy        0.500000
RADMC-3D        0.028571
SAOImage DS9    0.006167
Spec2d          0.084677
Stingray             NaN
TARDIS               NaN
WCS Tools            NaN
Name: Bibcode, dtype: float64

In [111]:
# total mentions of software aliases over time
API_over_time = pd.DataFrame({'year_count' : API_results.groupby(["Software_Package", "Pub_Year"])['Bibcode'].nunique()}).reset_index()
# write results
API_over_time.to_csv("API_over_time_040519.csv")
# show results over time
API_over_time

Unnamed: 0,Software_Package,Pub_Year,year_count
0,AstroBlend,2016,2
1,AstroBlend,2017,3
2,AstroPy,1997,1
3,AstroPy,2000,2
4,AstroPy,2007,1
5,AstroPy,2008,1
6,AstroPy,2013,9
7,AstroPy,2014,29
8,AstroPy,2015,55
9,AstroPy,2016,112
