# API Final Analysis
### Daina Bouquin, Daniel Chivvis

Scripts below were used to generate all .csv files in the API_RESULTS_061019/ folder

In [1]:
import pandas as pd
import numpy as np

In [2]:
API_results = pd.read_csv("API_CLEAN_INPUT_061019.csv") 

In [3]:
list(API_results.columns.values)

['Alias',
 'Software_Package',
 'Identifier',
 'Highlight',
 'Citation',
 'Bibcode',
 'Alternate_Bibcode',
 'BibGroup',
 'Publisher',
 'Article_ID',
 'DOI',
 'Pub_Year',
 'Pub_Date',
 'Author',
 'Title']

In [4]:
API_results.head(5)

Unnamed: 0,Alias,Software_Package,Identifier,Highlight,Citation,Bibcode,Alternate_Bibcode,BibGroup,Publisher,Article_ID,DOI,Pub_Year,Pub_Date,Author,Title
0,astroblend.com,Astroblend,0,#x2014; www.<em>astroblend.com</em> #x2014; ha...,False,2016A&C....15...50N,2016arXiv160203178N,CfA,Astronomy and Computing,2016arXiv160203178N|2016A&C....15...50N|10.101...,10.1016/j.ascom.2016.02.002,2016,2016-04-00,"Naiman, J. P.",AstroBlend: An astrophysical visualization pac...
1,astroblend.com,Astroblend,0,AstroBlend6 6 <em>www.astroblend.com&#x2F;</em...,True,2017PASP..129e8010G,2016arXiv161106965G,,Publications of the Astronomical Society of th...,2016arXiv161106965G|2017PASP..129e8010G|10.108...,10.1088/1538-3873/129/975/058010,2017,2017-05-00,"Gárate, Matías",Voxel Datacubes for 3D Visualization in Blender
2,astroblend.com,Astroblend,0,"-passing the X3D file format altogether), we r...",False,2016ApJ...818..115V,2015arXiv151002796V,,The Astrophysical Journal,2015arXiv151002796V|2016ApJ...818..115V|10.384...,10.3847/0004-637X/818/2/115,2016,2016-02-00,"Vogt, Frédéric P. A.",Advanced Data Visualization in Astrophysics: T...
3,AstroBlend,Astroblend,0,of this article.)Fig.9Isocontours generated wi...,False,2016A&C....15...50N,2016arXiv160203178N,CfA,Astronomy and Computing,2016arXiv160203178N|2016A&C....15...50N|10.101...,10.1016/j.ascom.2016.02.002,2016,2016-04-00,"Naiman, J. P.",AstroBlend: An astrophysical visualization pac...
4,AstroBlend,Astroblend,0,"From that perspective, recent efforts like fre...",True,2017PASP..129e8012V,2016arXiv161103862V,,Publications of the Astronomical Society of th...,2016arXiv161103862V|2017PASP..129e8012V|10.108...,10.1088/1538-3873/129/975/058012,2017,2017-05-00,"Vogt, Frédéric P. A.",Linking the X3D Pathway to Integral Field Spec...


In [5]:
# Convert highlight col to string

API_results['Highlight'] = API_results['Highlight'].astype('|S')\

In [6]:
API_results.to_csv("API_FINAL_ANALYSIS_061019.csv")

## Summary of Results

In [7]:
# How many papers did we find for each software package?
API_results.groupby('Software_Package')['Bibcode'].nunique()

Software_Package
Astroblend         5
Astropy          437
RADMC-3D         404
SAOImage DS9    1080
Spec2d           328
Stringray        319
TARDIS           116
WCSTools         334
Name: Bibcode, dtype: int64

In [8]:
# Proportion of articles with a software alias in a citation
API_all_citations = API_results.loc[API_results['Citation'] == True]
API_cite_proportion = API_all_citations.groupby('Software_Package')['Bibcode'].nunique()/API_results.groupby('Software_Package')['Bibcode'].nunique()
API_cite_proportion

Software_Package
Astroblend      0.600000
Astropy         0.718535
RADMC-3D        0.742574
SAOImage DS9    0.435185
Spec2d          0.545732
Stringray       0.576803
TARDIS          0.543103
WCSTools        0.574850
Name: Bibcode, dtype: float64

In [9]:
# How many unique aliases were used in citations for each package?
print (API_all_citations.groupby('Software_Package')['Alias'].nunique())

Software_Package
Astroblend       2
Astropy         24
RADMC-3D        15
SAOImage DS9    17
Spec2d          13
Stringray        2
TARDIS           3
WCSTools        13
Name: Alias, dtype: int64


In [10]:
# Which aliases were used in the papers with aliases in references? Write results to csv
API_cite_aliases = pd.DataFrame({'cite_count' : API_all_citations.groupby(['Software_Package', 'Alias', 'Identifier'])['Bibcode'].nunique()}).reset_index()
API_cite_aliases.to_csv("API_cite_aliases_061019.csv")
API_cite_aliases

Unnamed: 0,Software_Package,Alias,Identifier,cite_count
0,Astroblend,AstroBlend,0,3
1,Astroblend,astroblend.com,0,1
2,Astropy,10.1051/0004-6361/201322068,1,1
3,Astropy,AstroPy,0,306
4,Astropy,Astropy Collaboration,0,223
5,Astropy,Astropy Collaboration 2013,0,160
6,Astropy,Astropy Collaboration 2018,0,4
7,Astropy,Astropy Collaboration et al. (2013),0,167
8,Astropy,Astropy Collaboration et al. (2018),0,5
9,Astropy,Astropy Collaboration et al. 2013,0,167


In [11]:
# proportion of identifiers among software aliases in citations 
API_identifier_prop = API_results.loc[API_results['Identifier'] == 1]
API_identifier_prop = API_identifier_prop.groupby('Software_Package')['Bibcode'].nunique()/API_results.groupby('Software_Package')['Bibcode'].nunique()
API_identifier_prop

Software_Package
Astroblend           NaN
Astropy         0.038902
RADMC-3D        0.022277
SAOImage DS9    0.005556
Spec2d          0.054878
Stringray            NaN
TARDIS               NaN
WCSTools        0.002994
Name: Bibcode, dtype: float64

In [12]:
# total mentions of software aliases over time
API_over_time = pd.DataFrame({'year_count' : API_results.groupby(["Software_Package", "Pub_Year"])['Bibcode'].nunique()}).reset_index()
# write results
API_over_time.to_csv("API_over_time_061019.csv")
# show results over time
API_over_time

Unnamed: 0,Software_Package,Pub_Year,year_count
0,Astroblend,2016,2
1,Astroblend,2017,3
2,Astropy,1997,1
3,Astropy,2000,2
4,Astropy,2007,1
5,Astropy,2013,9
6,Astropy,2014,29
7,Astropy,2015,57
8,Astropy,2016,113
9,Astropy,2017,158
