In [2]:
import re
import xml.etree.ElementTree as ET
from lxml import etree
import sys

import Converter

In [9]:
def construct_html_file(html_open, html_close, converted_article_text, xml):
    
    title = Converter.find_article_metadata_bmgn(xml)[0]
    
    if not title:
        title = 'default_title'
    else:
        title = re.sub(r'[\/:*?"<>|]', '', title)  # Remove invalid characters
    
    file_name = title + '.html'
    
    with open(html_open, 'r', encoding='utf-8') as file:
        html_start = ''
        line = file.readline()

        while line:
            html_start += line
            line = file.readline()
            
    with open(html_close, 'r', encoding='utf-8') as file:
        html_end = ''
        line = file.readline()

        while line:
            html_end += line
            line = file.readline() 
    
    html_start = html_start.replace('[Hier moet de titel, deze tekst zou uniek moeten zijn, 030]', title)
    
    output = html_start + '\n' + converted_article_text + '\n' + html_end
    
    with open(file_name, 'w', encoding='utf-8') as file:
        file.write(output)    

In [8]:
def main():
    reference_style = 'a'
    style_file = 'ConversionHTML.xslt'

    try:
        input_file = sys.argv[1]
    except IndexError:
        print('Please input the input file as the first command line variable')
        return
    
    if len(sys.argv) > 2:
        style_file = sys.argv[2]

    if len(sys.argv) > 3:
        reference_style = sys.argv[3]
        if reference_style == 'ref' or reference_style == 'fn' or reference_style == 'jhok':
            print(f'Using specific reference style {reference_style}')
        else:
            print(f'{reference_style} is not a supported value, using automatic instead')
            reference_style = 'a'
    else:
        print('Using automatic reference style detection')

    print(f'Input file: {input_file}')
    print(f'Style file: {style_file}')
    print(f'Reference style: {reference_style}')
    
    file_without_front = Converter.split_title_from_body(input_file) #split the front, so we can add the title info in the replace_title function
    markdown_file = Converter.apply_xslt(file_without_front, style_file)
    
    markdown_file = Converter.add_tables(markdown_file, input_file)
    
    title = Converter.gen_title_html(input_file) #create a title from the XML
    
    if reference_style == 'jhok':
        print('preprocessing file')
        Converter.JHOK_preprocess(input_file)
        original_file = input_file
        input_file = 'output.xml'

        markdown_file = Converter.add_footnotes_bottom_html(markdown_file, input_file)
        markdown_file = Converter.add_fn(markdown_file, input_file)
        
        markdown_file = Converter.add_references_without_link(markdown_file, original_file)
        
    
    if reference_style == 'a':
        if Converter.contains_ref_type(input_file, 'xref', 'bibr'):
            reference_style = 'ref'
            print('detected ref')
            
        elif Converter.contains_ref_type(input_file, 'xref', 'fn'):
            reference_style = 'fn'
            print('detected fn')
    
    if Converter.contains_tag(input_file, 'ref-list'):
        print('contains ref list')
    
    if reference_style == 'fn':
        markdown_file = Converter.add_footnotes_bottom_html(markdown_file, input_file)
        markdown_file = Converter.add_fn(markdown_file, input_file)
        
    elif reference_style == 'ref':       
        markdown_file = Converter.add_references_bottom_html(markdown_file, input_file)
        markdown_file = Converter.add_ref(markdown_file)
        
    final_product = title + '\n' + markdown_file #merge the generated title with the processed front-free file
    
    construct_html_file('html_open.html', 'html_close.html', final_product, input_file)

In [None]:
if __name__ == '__main__':
    main()

In [3]:
Converter.compose_ref_dict('''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "JATS-journalpublishing1.dtd">
<article article-type="research-article" xml:lang="EN" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">LIBER</journal-id>
<journal-title-group>
<journal-title>LIBER QUARTERLY</journal-title>
</journal-title-group>
<issn pub-type="epub">2213-056X</issn>
<publisher>
<publisher-name>openjournals.nl</publisher-name>
<publisher-loc>The Hague, The Netherlands</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">lq.13361</article-id>
<article-id pub-id-type="doi">10.53377/lq.13361</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Monitoring Organisational Article Processing Charges (APCs) using Bibliographic Information Sources: Turku University Library Case</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-7901-2789</contrib-id>
<name>
<surname>Linna</surname>
<given-names>Anna-Kaarina</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<email>anna-kaarina.linna@tsv.fi</email>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-1374-2375</contrib-id>
<name>
<surname>Yl&#x00F6;nen</surname>
<given-names>Irene</given-names>
</name>
<xref ref-type="aff" rid="aff2"/>
<email>irene.ylonen@jyu.fi</email>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-7599-9870</contrib-id>
<name>
<surname>Salmi</surname>
<given-names>Anna</given-names>
</name>
<xref ref-type="aff" rid="aff2"/>
<email>anna.m.salmi@jyu.fi</email>
</contrib>
<aff id="aff1">University of Turku, Finland and Federation of Finnish Learned Societies, Turku, Helsinki, Finland</aff>
<aff id="aff2">Open Science Centre, University of Jyv&#x00E4;skyl&#x00E4;, Finland</aff>
</contrib-group>
<pub-date pub-type="epub">
<month>06</month>
<year>2023</year>
</pub-date>
<volume>33</volume>
<fpage>1</fpage>
<lpage>23</lpage>
<permissions>
<copyright-statement>Copyright 2023, The copyright of this article remains with the author</copyright-statement>
<copyright-year>2023</copyright-year>
<license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution 4.0 International License (CC BY 4.0), which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited. See <uri xlink:href="http://creativecommons.org/licenses/by/4.0/">http://creativecommons.org/licenses/by/4.0/</uri>.</license-p>
</license>
</permissions>
<self-uri xlink:href="https://www.liberquarterly.eu/article/10.53377/lq.13361"/>
<abstract>
<p>As open access publishing has become more widespread and required by research funders and the research community, the management and monitoring of article processing charges (APCs) have emerged as an important task in research organisations around the world. Within this tendency, a question of the comprehensiveness of organisational APC monitoring has become relevant. This case study demonstrates how the comprehensiveness of in-house APC monitoring can be evaluated using international bibliographic information sources like Web of Science and Scopus, where it is possible to identify the corresponding author, as well as Unpaywall and DOAJ, which contain information about the open access statuses and APCs of articles. Based on study results, it can be assumed that the organisation&#x2019;s in-house bookkeeping has succeeded in registering 52 percent of APC invoices while 48 percent have not been identified. The results show that the number of unreported publications that have been openly published and whose corresponding author is affiliated with the university is almost equal to those registered in the university&#x2019;s institutional APC report. The study describes the stages of data collection and processing in order of implementation, which allows a similar review to be feasible in another organisation. At the end of the article, development proposals are presented for both the organisations&#x2019; in-house data collection and the content of publishers&#x2019; invoices.</p>
</abstract>
<kwd-group>
<kwd>open access publishing</kwd>
<kwd>open access fees</kwd>
<kwd>article processing charges</kwd>
<kwd>open access policy</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1. Introduction</title>
<p>Over the last few years, development in open access publishing has increasingly led to Gold Open Access based on article-specific publication fees. In many countries, the development has been accelerated by the transition to nationwide transformative open access agreements. Many universities have also created centralised funds to pay article processing charges (APCs). Because of the above, the monitoring of article payments has become pivotal. To be able to outline the total cost of scientific publication, it should be clear what is being paid and to whom. A better and wider knowledge base on the cost of publishing has been identified as an important goal, especially in scientific libraries (see e.g. <xref ref-type="bibr" rid="r8">Lawson, 2015</xref>; <xref ref-type="bibr" rid="r14">Pinfield et al., 2016</xref>). However, estimating the total cost has proven difficult.</p>
<p>In Finland, 12 universities (86 percent of all universities) followed APCs in 2019 (<xref ref-type="bibr" rid="r17">Vil&#x00E9;n et al., 2021</xref>). In recent years, the motivation of Finnish universities to monitor the costs of open access publications has been partly influenced by a national policy. One main goal of the policy is to make the total cost of scholarly publishing transparent and publicly available (<xref ref-type="bibr" rid="r12">Open Science Coordination in Finland, Federation of Finnish Learned Societies, 2020</xref>).</p>
<p>As the first university library in Finland, Turku University Library started the systematic collection of APCs in co-operation with the university&#x2019;s financial services in the spring of 2017. Monitoring was organised on two levels: the processing of purchase invoices centrally in the financial services and a more detailed analysis of payments in the library. It is noteworthy that the model is not based on a centralised fund, but invoices are paid from unit-specific budgets. Payments made with credit cards were retroactively included in the data collection.</p>
<p>At the University of Turku (UTU), invoice processing is centralised in the financial services. Financial secretaries identify the APC payments from amongst all invoices received by the university and allocate them to the correct general ledger account. Before the organisation-level monitoring started, the library arranged a training session for financial secretaries on open access publishing and APCs to help them identify the correct invoices.</p>
<p>The library receives a compilation of the APC invoices based on the data of the APC general ledger account as an Excel file three times per year. The original invoices are available as PDF files in the local accounting system where an information specialist has a user account. However, library access is limited to the specific general ledger account only. With authorised access to the APC account, it is possible to view the original invoices more closely and identify the publication, Digital Object Identifier (DOI), journal title, publisher and whether the publication channel is full open access or hybrid. After identification, the information specialist creates an Excel table where the identifying data of the publication and publication channel are recorded. If the fee is not APC but instead, for example, a submission fee, it will be deleted from the report.</p>
<p>In addition, the library checks whether and under what conditions the publisher allows the publication to be self-archived, as well as whether the information about the publication has been deposited in the university&#x2019;s current research information system UTUCRIS. If not, the publication and APC information are recorded to UTUCRIS, and the full article is self-archived and made openly available via the institutional repository. Finally, the compilation of the APC data is submitted to the international Open APC service (University of Turku APC data available at <ext-link ext-link-type="uri" xlink:href="https://treemaps.openapc.net/apcdata/turku-u/">https://treemaps.openapc.net/apcdata/turku-u/</ext-link>).</p>
<p>Open APC is a service managed by the Bielefeld University Library. Open APC releases data sets on fees paid for open access journal articles by universities and research institutions (<xref ref-type="bibr" rid="r13">Pieper &#x0026; Broschinski, 2018</xref>). In the future, the separate data transfer from Turku University Library to Open APC will become unnecessary because APC data can be transferred from UTUCRIS to the national VIRTA publication service<xref ref-type="fn" rid="fn1"><sup>1</sup></xref> automatically. After this, VIRTA delivers the data received from the Finnish organisations centrally to Open APC.</p>
<sec id="s1a">
<title>1.1. Research Questions</title>
<p>Recently, the library began planning a study to examine the comprehensiveness and quality of the current APC data collection. One premise was to harvest APC information from international bibliographic information sources and compare it with in-house APC bookkeeping to find out how many paid articles were actually discerned by the organisation&#x2019;s two-stage monitoring model. The years 2017 and 2018 were selected for the analysis for two reasons: the collection of APCs had just started and the transformative agreements, where APCs are often included in the subscription price, were not yet concluded.</p>
<p>In this study, we aim to answer the following questions:</p>
<list list-type="bullet">
<list-item><p>Is it possible to identify articles that may have been published with an article processing charge (APC) by using international bibliographic sources&#x003F;</p></list-item>
<list-item><p>How does the information retrieved from external sources compare with the organisation&#x2019;s own bookkeeping&#x003F;</p></list-item>
<list-item><p>Do articles not identified by in-house reports share common characteristics (e.g. a publisher)&#x003F;</p></list-item>
</list>
</sec>
</sec>
<sec id="s2">
<title>2. Literature Review</title>
<p>There have been a few attempts to study the monitoring of article fees by research organisations. The latest review can be found from <xref ref-type="bibr" rid="r2">Andre Bruns and Niels Taubert (2021)</xref>. They have reviewed Gold Open Access payment data compiled from German universities into the Open APC data collection. In their study, <xref ref-type="bibr" rid="r2">Bruns and Taubert (2021)</xref> found two significant problems in tracking publication fees. They call them blind spots. Comprehensive data are not available for organisations, as they do not have a centralised fund to facilitate the monitoring of payments. In these organisations, fees are paid in different units, and collecting centralised data has not been simple. There may also be a complex set of funding sources. In addition, even if an organisation has a centralised fund, monitoring payments can still be challenging, as payments are made by different entities of a research organisation and processed in a variety of ways.</p>
<p>To look at the blind spots they perceived, <xref ref-type="bibr" rid="r2">Bruns and Taubert (2021)</xref> used, in their own words, an indirect approach to identify APC-specific publications. They used bibliometric data sources (Web of Science and SSN-Gold-OA-list) for the identification instead of relying on data collected by organisations. As a result, for a total of more than 40 German universities, the authors concluded that the data reported in the Open APC database was notably incomplete. In universities that transferred data to Open APC, more than half of the estimated costs were not covered. However, the reasons for the incompleteness of the data were beyond the scope of their study.</p>
<p>A local knowledge gap in monitoring publication fees was also noticed by <xref ref-type="bibr" rid="r3">Stephen Cramond et al. (2019)</xref> a couple of years earlier. In 2018, the Council of Australian University Librarians (CAUL) carried out a project to study the financial impact of APC payments on universities in Australia and New Zealand. One of the main results of the pilot project was how little is actually known about the nature and amounts of publication fees in universities. An important finding was the variance of different sources of funding such as research funders, endowment funds, departmental funds, transfers from research partners or researchers&#x2019; personal funds. They also found that only a few universities had a centralised APC fund, and only some of the universities collected data on publication fees at all.</p>
<p><xref ref-type="bibr" rid="r3">Cramond et al. (2019)</xref> reported that, due to the limitations of centrally reported figures, the Australian project decided to look at established APC payments in an indirect way. They evaluated the volumes of open access articles and potential APC payments on the basis of publication data in the Web of Science and Scopus and compared the data obtained with data from Unpaywall. The methodology used was the same as in this article.</p>
<p>APC payments that are paid from different funds and from different sources outside the centralised organisational fund are often referred to as &#x2018;APCs in the wild&#x2019; (see e.g. <xref ref-type="bibr" rid="r1">Andrew, 2016</xref>; <xref ref-type="bibr" rid="r11">Monaghan et al., 2020</xref>). <xref ref-type="bibr" rid="r1">Theo Andrew (2016)</xref> noted in a review of the University of Edinburgh&#x2019;s &#x2018;APCs in the wild&#x2019; that there was a significant amount of APC fees, of which the university was unaware. He estimated that &#x2018;APCs in the wild&#x2019; could account for up to 20 percent more in the total cost of publication than previously thought.</p>
<p>In Sweden, the monitoring of APC payments in research organisations has also been examined (<xref ref-type="bibr" rid="r7">Gustafsson et al., 2017</xref>; <xref ref-type="bibr" rid="r10">Lov&#x00E9;n, 2019</xref>). Stockholm University Library and Gothenburg University Library have been tracking open access publishing costs since 2016. Sweden has a national centralised service run by the National Library of Sweden to collect APC fees from universities, called Open APC Sweden<xref ref-type="fn" rid="fn2"><sup>2</sup></xref> (<xref ref-type="bibr" rid="r7">Gustafsson et al., 2017</xref>).</p>
<p>In her article, <xref ref-type="bibr" rid="r10">Lisa Lov&#x00E9;n (2019)</xref> has described the process of collecting fees at Stockholm University. The process within the university works similar as at the University of Turku. However, Stockholm University seems to have a more transparent accounting system, in which all payments are accessible for all authorised users. This makes it much easier to track invoices from the system if they were not initially recorded for the correct APC account. In her review, <xref ref-type="bibr" rid="r10">Lov&#x00E9;n (2019)</xref> has looked more closely at the publishers to whom the largest amount of APC fees is paid, as well as the sources of funding.</p>
<p>As of 2018, Stockholm University Library managed a significant centralised fund for APC payments (<xref ref-type="bibr" rid="r10">Lov&#x00E9;n, 2019</xref>). At that time, <xref ref-type="bibr" rid="r10">Lov&#x00E9;n (2019)</xref> noted that Sweden decided to withdraw from the agreement with Elsevier, and the saved funds were used to finance article fees for full open access journals. Many benefits were identified by the centralised fund, including easier tracking of article fees, greater understanding of the open access publishing landscape and clear benefits for researchers who no longer had to worry about paying article fees themselves.</p>
<p>In a report from the University of Gothenburg, <xref ref-type="bibr" rid="r7">Gustafsson et al. (2017)</xref> noted how difficult it was to find out the exact amount of APC payments. In Gothenburg, publication fees for 2016 were analysed. Due to the monitoring problems, two different methods were used in this analysis to identify the payments. First, the authors reviewed the organisation&#x2019;s invoices in the financial management system, and second, they requested information directly from the publisher. Excluding Elsevier, data were obtained from several significant publishers. Combining these methods, the aim was to estimate the minimum APC costs for the University of Gothenburg in 2016. The authors concluded that at least SEK 5.2 million (approximately EUR 490,000) was paid in 2016 for the openness of articles.</p>
<p>According to the <xref ref-type="bibr" rid="r7">Gustafsson et al. (2017)</xref> report, APC payments had been credited with a total of 18 different accounts in the university&#x2019;s financial management system. APCs paid with a credit card were also a problem in terms of tracking. Additionally, there were major shortcomings in the publishers&#x2019; invoices from the perspective of identifying the article.</p>
<p>When it comes to tracking APC fees at the organisational level utilising financial management systems, all the articles and reports above (<xref ref-type="bibr" rid="r2">Bruns &#x0026; Taubert, 2021</xref>; <xref ref-type="bibr" rid="r3">Cramond et al., 2019</xref>; <xref ref-type="bibr" rid="r7">Gustafsson et al., 2017</xref>; <xref ref-type="bibr" rid="r10">Lov&#x00E9;n, 2019</xref>) recognise the great difficulty of collecting data. To improve the quality of the data collected by organisations, further consideration needs to be given to the reasons why the coverage of the data is largely poor. To get to the root causes, it is necessary to continue to observe the data collected by organisations and compare it with data from external sources.</p>
</sec>
<sec id="s3">
<title>3. Methodology</title>
<p>The objective of this study was to identify APC-liable publications from bibliographic information sources and to compare the results with centrally collected APC data from the University of Turku. The initial assumption was that the university&#x2019;s own bookkeeping has managed to collect APCs comprehensively. The analysis is based on comparison of in-house APC reports and harvested University of Turku-affiliated open access articles from the bibliographic information sources Web of Science and Scopus. Unpaywall and DOAJ are used for identifying the open access statuses of articles more closely and to exclude articles from so-called Diamond publication channels, which do not charge APCs. The analysis focuses on open access articles whose corresponding authors are affiliated with the University of Turku or Turku University Hospital and which are not reported in internal APC bookkeeping at the university or hospital.</p>
<p>Publications were included in the analysis based on the institutional affiliation of the corresponding author (Scopus) or the reprint author (Web of Science) to the University of Turku or the Turku University Hospital. Although there is disciplinary variation in determining the roles and responsibilities among authors, including the role of the corresponding or reprint author, they are generally likely to be the principal authors and therefore most likely to be responsible for paying APCs (<xref ref-type="bibr" rid="r3">Cramond et al., 2019</xref>; <xref ref-type="bibr" rid="r6">Gonz&#x00E1;lez-Alcaide et al., 2017</xref>). The analysis in this study is based on this assumption. As the reprint or corresponding author needs to remain locatable for requests as far to the future as possible, the role is typically assigned to an author with a stable position in the organisation (<xref ref-type="bibr" rid="r19">Weiss, 2012</xref>).</p>
<p>However, even though the corresponding author is usually the person responsible for assuring the publication costs, the APC invoice may be distributed to a co-author in another institution or split by several authors (<xref ref-type="bibr" rid="r10">Lov&#x00E9;n, 2019</xref>). For this study, it was reasonable to assume that the corresponding or reprint author was most likely to be a person responsible for submitting the APC fee, even though there could be no absolute certainty about it. This is one of the limitations of the study.</p>
<sec id="s3a">
<title>3.1. Data Collection from in-House Bookkeeping</title>
<p>For this study, a separate Excel spreadsheet of all identified APCs during 2017 and 2018 was received from bookkeeping. In total, the data contained information on 233 individual articles whose payment year was either 2017 or 2018. When the APC data were compared to publications reported by the university to the Ministry of Education and Culture,<xref ref-type="fn" rid="fn3"><sup>3</sup></xref> it was found that the year of payment is not necessarily the same as the publication year. Two of the articles had been published in 2016, 17 in 2019 and two in 2020, which were excluded from the analysis to ensure a consistent comparison based on publication years. Since the Unpaywall identification is based on DOIs, 12 articles without DOIs need to be removed from the analysis.</p>
<p>To supplement the university&#x2019;s in-house data, similar information on APCs paid by the Turku University Hospital was also received. The hospital&#x2019;s publications are recorded in the university&#x2019;s current research information system (UTUCRIS) and reported to the Ministry of Education and Culture as outputs of the University of Turku. The information on APCs paid by the hospital was therefore natural to include in the analysis. The hospital data were interpreted by a librarian trainee preparing his bachelor&#x2019;s thesis on APC collection.<xref ref-type="fn" rid="fn4"><sup>4</sup></xref> From these data, a total of 65 articles with paid APCs were identified. A total of 265 publications from the organisation&#x2019;s own bookkeeping ended up in the analysis (<xref ref-type="table" rid="tb001">Table 1</xref>).</p>
<table-wrap id="tb001">
<label>Table 1:</label>
<caption><p>Total number of publications in University of Turku and Turku University Hospital APC bookkeeping by payment year.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top"/>
<th align="left" valign="top">University of Turku</th>
<th align="left" valign="top">Turku University Hospital</th>
<th align="left" valign="top">Total</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Payment year 2017</td>
<td align="left" valign="top">78</td>
<td align="left" valign="top">26</td>
<td align="left" valign="top">104</td>
</tr>
<tr>
<td align="left" valign="top">Payment year 2018</td>
<td align="left" valign="top">122</td>
<td align="left" valign="top">39</td>
<td align="left" valign="top">161</td>
</tr>
<tr>
<td align="left" valign="top">Total number of publications</td>
<td align="left" valign="top">200</td>
<td align="left" valign="top">65</td>
<td align="left" valign="top">265</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3b">
<title>3.2. Data Collection from External Sources</title>
<p>The following external sources were used to collect and analyse the article metadata:</p>
<list list-type="bullet">
<list-item><p>Web of Science</p></list-item>
<list-item><p>Scopus</p></list-item>
<list-item><p>Unpaywall</p></list-item>
<list-item><p>DOAJ</p></list-item>
</list>
<p>The search from Web of Science and Scopus was conducted in April 2020. The filtered results of the search were submitted to Unpaywall in the same month. The bibliographic knowledge base of the analysis was created. At the final stage, in the autumn of 2021, the DOAJ metadata were integrated into the data set to exclude Diamond open access journals from the analysis.</p>
<sec id="s3b1">
<title>3.2.1. Web of Science and Scopus</title>
<p>The first phase of the project consisted of gleaning the metadata of the journal articles that were published in 2017 and 2018 and were affiliated with the University of Turku or Turku University Hospital from 2017 to 2018. The decision to use and combine data from both Web of Science Core Collection and Scopus was based on the observation, made over time by the publication information reporting team at the Turku University Library, that a consistent number of university-affiliated publications were indexed in Scopus as online first (early access) versions before becoming available in Web of Science. Moreover, there is variation between the two databases in terms of disciplinary coverage, e.g. in social sciences and humanities that are prevalent at the University of Turku (for a comparison of document overlap between Web of Science and Scopus, see <xref ref-type="bibr" rid="r18">Visser et al., 2021</xref>).</p>
<p>The Web of Science results were used as the baseline, and the result lists of the two databases were matched based on DOIs. Duplicates that were found in both Web of Science and Scopus were excluded from the Scopus data. This choice was made due to the rich open-access-related metadata available in the former. In addition, author affiliation information was found to be particularly clear in the Web of Science metadata since it included the name and address of the reprint author plus a separate metadata field for the corresponding author.</p>
<p>Article types selected for the study were original refereed articles and review articles. These two articles types were selected because they were assumed to be particularly APC liable. Both Web of Science and Scopus index the name and affiliation of the corresponding author or authors as well as information on the open availability of the articles. The information on corresponding authors was found to be crucial, as it is not available in any other internal or external sources.</p>
<p>Next, the article entries were filtered based on whether or not they had a clearly indicated University of Turku or Turku University Hospital affiliation in the corresponding author field. Publications where the corresponding author had two or more affiliations were excluded from the analysis because it was not possible to unambiguously identify which organisation had been paid the possible APC payment. In addition, if the corresponding author had not provided the University of Turku or Turku University Hospital email address, it was assumed that the author&#x2019;s primary affiliation was neither the university nor the hospital.</p>
</sec>
<sec id="s3b2">
<title>3.2.2. Unpaywall</title>
<p>The second phase of the project consisted of Unpaywall analysis for those articles where corresponding authors were affiliated with the university or the hospital.</p>
<p>Unpaywall is a non-profit service run by OurResearch that curates a list of almost 20 million freely available scholarly articles, and it is commonly used by browser plug-ins (<xref ref-type="bibr" rid="r4">Else, 2018</xref>). In this study, however, the Unpaywall Simple Query tool was used for receiving a structured open access report. The report was created in two batches because only 1,000 DOIs were possible to submit at once. To ensure accurate matching with open access status data, articles without a DOI were excluded in this phase. Finally, the received Excel sheets were combined and copied to the analysis.</p>
</sec>
<sec id="s3b3">
<title>3.2.3. DOAJ</title>
<p>Although DOAJ information is aggregated in Unpaywall, information was also needed on whether or not the publication channel charges APCs from authors. Unpaywall does not include information on APCs, but DOAJ does. In order to exclude the Diamond open access journals, i.e. journals that do not collect APCs from authors or readers, the original Web of Science and Scopus data enriched with the Unpaywall results were compared with DOAJ information. DOAJ journal-level metadata were downloaded in .csv file format and imported into the analysis Excel. The comparison was made by ISSN IDs.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<title>4. Analysis and Results</title>
<sec id="s4a">
<title>4.1. Data Analysis</title>
<p>The analysis focuses on open access publications whose corresponding author is affiliated with the University of Turku or Turku University Hospital and which are not reported in the university&#x2019;s or hospital&#x2019;s in-house APC bookkeeping.</p>
<p>To perform the analysis, all collected data were compiled into one Excel file and compared using Excel&#x2019;s VLOOKUP syntax based on the articles&#x2019; permanent identifiers (DOIs, ISSNs). After comparison, the analysis data sheet contained 1,897 rows of articles and a 39-column information matrix compiling the in-house bookkeeping, Web of Science, Scopus, Unpaywall, DOAJ and university publication data reports from 2017 and 2018. For the final analysis, the following indicators were selected for further examination.</p>
<array>
<tbody>
<tr>
<td align="left" valign="top">1)</td>
<td align="left" valign="top">In-house bookkeeping</td>
<td align="left" valign="top">DOI, APC payment year</td>
</tr>
<tr>
<td align="left" valign="top">2)</td>
<td align="left" valign="top">University data warehouse</td>
<td align="left" valign="top">Publication year</td>
</tr>
<tr>
<td align="left" valign="top">3)</td>
<td align="left" valign="top">Web of Science</td>
<td align="left" valign="top">Reprint author, email, open access information</td>
</tr>
<tr>
<td align="left" valign="top">4)</td>
<td align="left" valign="top">Scopus</td>
<td align="left" valign="top">Correspondence address, affiliation, access type</td>
</tr>
<tr>
<td align="left" valign="top">5)</td>
<td align="left" valign="top">Unpaywall</td>
<td align="left" valign="top">best_oa_evidence, best_oa_host, oa_status, best_oa_license, journal_name, journal_issn_l, journal_issns, publisher</td>
</tr>
<tr>
<td align="left" valign="top">6)</td>
<td align="left" valign="top">DOAJ</td>
<td align="left" valign="top">APC payment</td>
</tr>
</tbody>
</array>
<p>In manual revision, a total of 199 publications were excluded because the corresponding author had not provided the university&#x2019;s or hospital&#x2019;s email addresses, or they had a double affiliation. After this procedure, 1,698 rows remained in the data. Next, the publications recognised in either the university&#x2019;s or hospital&#x2019;s APC bookkeeping were removed from the analysis. After that, the data set contained 1,433 articles. Finally, the openly available articles were extracted from the data using Unpaywall open access statuses.</p>
<p>Unpaywall uses four different open access statuses: Green, Gold, Hybrid and Bronze, which are commonly used in open access contexts (<xref ref-type="bibr" rid="r16">Priem, 2021</xref>). It is also widely agreed that publications identified as Bronze open access cannot be defined as genuinely open access because there is no information about their long-term availability and license (<xref ref-type="bibr" rid="r15">Piwowar et al., 2018</xref>). In total, the analysis data contained 298 publications that were indexed as Gold or Hybrid status in Unpaywall. A closer examination of these publications showed that there can be identified both Creative Commons licensed publications and publications that were published in DOAJ-indexed journals without a license. Finally, based on the information in DOAJ, journals that do not charge an APC were removed from the analysis.</p>
</sec>
<sec id="s4b">
<title>4.2. Results</title>
<p>The purpose of this study was to survey how comprehensively the organisation&#x2019;s own accounting reaches the publications where the APC has been paid. Based on the data analysis from different bibliographic and open access sources, we identified 242 articles (<xref ref-type="fig" rid="fg001">Figure 1</xref>) that have been published openly available in Gold Open Access or Hybrid journals and are mostly licensed under Creative Commons licenses.</p>
<fig id="fg001">
<label>Fig. 1:</label>
<caption><p>The share of identified licensed open access articles that were not reported in organisation APC reports and that were published in journals that charge an APC.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="figures/LIBER_2023_33_Linna_fig1.jpg"/>
</fig>
<p>One of the goals of this analysis was to determine whether unidentified article processing fees would have a connecting factor, such as a specific publisher.</p>
<p>After analysing both internal and external data, no significant differences between publishers were found. As expected, a significant number of articles had been published in Springer, Wiley and Elsevier journals (see <xref ref-type="fig" rid="fg002">Figure 2</xref>). Interestingly, articles from these publishers came out on top in both analyses. Some of their articles were well founded in the university&#x2019;s bookkeeping (<xref ref-type="fig" rid="fg003">Figure 3</xref>), but also plenty of these publishers&#x2019; articles have been unnoticed in the university&#x2019;s monitoring (<xref ref-type="fig" rid="fg002">Figure 2</xref>). The reason for this is difficult to assess. In-house follow-up at the University of Turku is based on observations and interpretations of the content of invoices by individual financial secretaries.</p>
<fig id="fg002">
<label>Fig. 2:</label>
<caption><p>Distribution of unreported open access articles (Gold and Hybrid) by publishers based on the number of their articles. Publishers with four or less articles are combined with &#x2018;Other publishers&#x2019;.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="figures/LIBER_2023_33_Linna_fig2.jpg"/>
</fig>
<fig id="fg003">
<label>Fig. 3:</label>
<caption><p>Distribution of in-house APC reports by publishers based on the number of their articles. Publishers with four or less articles are combined with &#x2018;Other publishers&#x2019;.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="figures/LIBER_2023_33_Linna_fig3.jpg"/>
</fig>
<p>Most clearly, the in-house follow-up was unable to report the articles of MDPI and American Chemical Society (see <xref ref-type="fig" rid="fg004">Figure 4</xref>). This would indicate that the publisher in question may have room for improvement in the information recorded in the invoices.</p>
<fig id="fg004">
<label>Fig. 4:</label>
<caption><p>Comparison of publishers of reported and unreported publications based on article volumes. Publishers with four or less articles are combined with &#x2018;Other publishers&#x2019;.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="figures/LIBER_2023_33_Linna_fig4.jpg"/>
</fig>
<p>Notably, this final comparison (<xref ref-type="fig" rid="fg005">Figure 5</xref>) shows that there can be identified almost as many reported as unreported open access articles.</p>
<fig id="fg005">
<label>Fig. 5:</label>
<caption><p>The share of articles reported in university or hospital APC bookkeeping and identified open access articles from bibliographic information sources.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="figures/LIBER_2023_33_Linna_fig5.jpg"/>
</fig>
<p>Based on the analysis, it can be assumed that the organisation&#x2019;s bookkeeping has succeeded in registering 52 percent of APC invoices, while 48 percent (242 articles) have not been identified (<xref ref-type="fig" rid="fg005">Figure 5</xref>). When focusing on the annual level, a total of 99 articles have been found in 2017 and 166 in 2018 (<xref ref-type="fig" rid="fg006">Figure 6</xref>). Considering that only two years of statistics are under review, the proportion of identified payments has clearly increased.</p>
<fig id="fg006">
<label>Fig. 6:</label>
<caption><p>The comparison of reported and unreported APCs by publication year.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="figures/LIBER_2023_33_Linna_fig6.jpg"/>
</fig>
</sec>
</sec>
<sec id="s5">
<title>5. Discussion and Conclusions</title>
<p>The first research question of this study was to find out whether it is possible to obtain information on paid APCs by using external information sources. Based on the analysis, and paying attention to certain limitations, it seems possible to harvest and identify APCs from external sources. External data sources have also been utilised previously, for example at Stockholm University where the Web of Science is used to complete the annual institutional APC report by identifying open access articles in which University of Stockholm researchers are identified as corresponding authors (<xref ref-type="bibr" rid="r10">Lov&#x00E9;n, 2019</xref>).</p>
<p>However, a few limitations must be remembered when using bibliographic information sources. Payments cannot be clearly defined from them. A particular problem is caused by the roles of the corresponding author and publication fee payer. In this study, it has been assumed that the corresponding author is also the payer of the article fee, but in fact, this is not always the case. The paying party cannot be found clearly from any available source other than the organisation&#x2019;s own monitoring. In addition, when using external sources, it is impossible to find out the payment year of the publication fee. The publication fee may have been paid in a different year than the publication has been published.</p>
<p>The email address used by the author can also cause problems. If the researcher is not using the organisation&#x2019;s email domain when submitting the article to the journal, it can be challenging to identify the affiliation to a certain organisation. Consequently, articles of this type may be completely excluded from the review, as decided in this study.</p>
<p>Using external sources to monitor publication fees is possible but rather laborious and contains the above-mentioned limitations. The most ideal situation would be if the payments could be identified within the paying organisation. Although the internal monitoring of organisations does not seem very accurate in light of this and previous research, it is possible to develop it.</p>
<p>Although in this study we did not focus on the reasons why the organisation&#x2019;s own monitoring is so inaccurate, some possible causes can be presented. Currently in Finland, the organisations&#x2019; own monitoring is largely based on the manual work of the financial secretaries. They are usually not familiar with open access terminology. If the APC is not clearly mentioned in the invoice, it is possible that it is not registered. Publication fees paid by credit card are particularly difficult because there is very little information on the credit company&#x2019;s invoice.</p>
<p>The second research question concerned how information retrieved from other information sources compares with the organisation&#x2019;s internal accounting. Based on information received from Web of Science, Scopus, Unpaywall and DOAJ, it can be assumed that the University of Turku has paid APCs for more than 500 articles published in 2017&#x2013;2018 (see <xref ref-type="fig" rid="fg005">Figure 5</xref>). Most of these articles were indexed in Web of Science (444 articles, 86 percent), and only 34 (7 percent) articles were published in journals that were indexed only in Scopus. In this context, it is important to note that Web of Science was assessed as the primary data source, so if the same article was found in both databases, only Web of Science data were taken for analysis. In the university&#x2019;s own bookkeeping were also 29 articles that were not indexed by either Web of Science or Scopus.</p>
<p>The third and final research question concerned whether open access APC-liable publications identified from external bibliographic sources share some common characteristics (e.g. a publisher). A significant number of articles (51 percent) reported in the organisation&#x2019;s own bookkeeping had been published in Springer, Wiley and Elsevier journals (see <xref ref-type="fig" rid="fg003">Figure 3</xref>). When comparing the same distribution with publishers of non-reported open access articles, one can observe that, although Springer is the largest of these, MDPI rises second (<xref ref-type="fig" rid="fg002">Figure 2</xref>). Both statistics also highlight the large deviation of articles among different publishers, which suggests that in 2017&#x2013;2018, open access publishing was not centralised for journals of large commercial scientific publishers. In fact, the data from DOAJ demonstrate that there were 32 articles that were published in open access journals that did not charge APCs, so they were removed from the final analysis for that reason.</p>
<p>The final result shows (<xref ref-type="fig" rid="fg005">Figure 5</xref>) that the University of Turku in-house bookkeeping has succeeded in registering 52 percent of APC invoices, while 48 percent have not been identified. The results show that the number of unreported publications that have been openly published and whose corresponding author is affiliated with the university is almost equal to those registered in the university&#x2019;s institutional APC report. Based on the result, it can be assumed that the APC monitoring for the years 2017&#x2013;2018 has not produced a particularly reliable result, and based on that, the APC statistics collected by the organisation should not be used exclusively as a basis for evaluating the total costs.</p>
<sec id="s5a">
<title>5.1. Recommendations for Organisations</title>
<p>To develop local monitoring, it is important to be able to identify payments as precisely as possible. Therefore, the practice mentioned by <xref ref-type="bibr" rid="r10">Lisa Lov&#x00E9;n (2019)</xref> in her article at Stockholm University would be applicable in other organisations as well. If invoice management were as open as possible and access to the system was also possible for library and experts in open science within the institution, it would improve the identification of publication fees. Lov&#x00E9;n mentions that, in Stockholm&#x2019;s invoice management system, it is possible to search by the name of the publisher, which enables finding invoices that were inadvertently credited to something other than the publication fee account. The more centralised the monitoring of payment information is and the more the expertise of information specialists can be utilized, the more accurate the result could be.</p>
<p>Organisations themselves can facilitate the tracking and identification of invoices by instructing researchers to provide invoicing information in the correct format in publishers&#x2019; systems. Additionally, by using different types of references that can be added to invoices, they can be more easily identified in the organisation&#x2019;s financial management.</p>
<p>Naturally, centralised funds facilitate the monitoring of publication fees. However, it must be remembered that they also need to be communicated clearly and sufficiently widely within the university, so that all individual researchers and financial secretaries would remember their existence when the payment of the publication fee becomes relevant to them. Especially in situations where the organisation is unable to cover all publication fees, but there are internal criteria for centralised payment of fees, this is particularly important.</p>
</sec>
<sec id="s5b">
<title>5.2. Recommendations for Publishers</title>
<p>Publishers would have an opportunity to clarify and facilitate the monitoring of publication fees. From the paying organisation&#x2019;s point of view, the most important issue would be the adequate information on the invoices received from the publishers.</p>
<p>The <xref ref-type="bibr" rid="r5">ESAC community (2017)</xref> has listed recommendations of the information an APC invoice should contain in machine-readable format:</p>
<list list-type="bullet">
<list-item><p><italic>Name and email address of the author who is affiliated to the paying institution (must be the corresponding author)</italic></p></list-item>
<list-item><p><italic>Complete statement of the author&#x2019;s affiliation to the paying institution (e.g. university, institute, department)</italic></p></list-item>
<list-item><p><italic>Funding organisation (research funder and grant ID)</italic></p></list-item>
<list-item><p><italic>Date of acceptance</italic></p></list-item>
<list-item><p><italic>Date of publication</italic></p></list-item>
<list-item><p><italic>Journal title</italic></p></list-item>
<list-item><p><italic>ISSN</italic></p></list-item>
<list-item><p><italic>Article title</italic></p></list-item>
<list-item><p><italic>Article type</italic></p></list-item>
<list-item><p><italic>DOI and link to the published article</italic></p></list-item>
<list-item><p><italic>Amount due</italic></p></list-item>
<list-item><p><italic>Discounts and discount group (if applicable)</italic></p></list-item>
<list-item><p><italic>CC license (<xref ref-type="bibr" rid="r5">ESAC community, 2017</xref>, <ext-link ext-link-type="uri" xlink:href="https://esac-initiative.org/about/oa-workflows/">https://esac-initiative.org/about/oa-workflows/</ext-link>)</italic></p></list-item>
</list>
<p>Currently, not all publishers&#x2019; invoices have sufficient information. In practice, the invoice may have contained so little information that it has not been possible to identify the paid article at all. The invoice with unclear or incomplete information can remain unnoticed by the financial services, and even if it is recognised, further processing in the library is laborious.</p>
</sec>
<sec id="s5c">
<title>5.3. Study Limitations</title>
<p>The study has concentrated on using and analysing information on international bibliographic sources. One can justifiably ask why the study did not make use of the lists of paid open access articles available from publishers. During the research, pre-compiled publisher-specific lists were centrally available only from large hybrid publishers, so getting them from smaller publishers would have been comparatively difficult. Furthermore, analysis based solely on data from Web of Science and Scopus by default excludes publications issued in national languages in Finnish publication channels. On the other hand, based on an extensive survey conducted in 2020, APCs were very rare in Finnish peer-reviewed publication channels; only 2 percent of 332 of them charged APCs at that time (<xref ref-type="bibr" rid="r9">Linna et al., 2020</xref>). The choice to use Web of Science and Scopus as sources was based on their comprehensiveness and ready availability at the time of the study.</p>
</sec>
<sec id="s5d">
<title>5.4. Availability of Data</title>
<p>The data can be accessed through Jyv&#x00E4;skyl&#x00E4; University Digital Repository at <ext-link ext-link-type="doi" xlink:href="10.17011/jyx/dataset/83986">https://doi.org/10.17011/jyx/dataset/83986</ext-link>.</p>
</sec>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="r1"><mixed-citation>Andrew, T. (2016). <italic>Improving estimates of the total cost of publication by recognising &#x2018;APCs paid in the wild&#x2019;</italic>. The Winnower. <ext-link ext-link-type="uri" xlink:href="https://citeseerx.ist.psu.edu/document?repid&#x003D;rep1&#x0026;type&#x003D;pdf&#x0026;doi&#x003D;9f4798017e3acfd62057a1bc0d3d03acaba47606">https://citeseerx.ist.psu.edu/document?repid&#x003D;rep1&#x0026;type&#x003D;pdf&#x0026;doi&#x003D;9f4798017e3acfd62057a1bc0d3d03acaba47606</ext-link></mixed-citation></ref>
<ref id="r2"><mixed-citation>Bruns, A., &#x0026; Taubert, N. (2021). Investigating the blind spot of a monitoring system for article processing charges. <italic>Publications, 9</italic>(3), 41. <ext-link ext-link-type="doi" xlink:href="10.3390/publications9030041">https://doi.org/10.3390/publications9030041</ext-link></mixed-citation></ref>
<ref id="r3"><mixed-citation>Cramond, S., Barnes, C., Lafferty, S., Barbour, V., Booth, D., Brown, K., Costello, D., Croker, K., O&#x2019;Connor, R., Rolf, H., Ruthven, T., &#x0026; Scholfield, S. (2019). Fair, affordable and open access to knowledge: The Caul collection and reporting of APC information project. <italic>Proceedings of the IATUL Conferences</italic>. Paper 2. <ext-link ext-link-type="uri" xlink:href="https://docs.lib.purdue.edu/iatul/2019/fair/2">https://docs.lib.purdue.edu/iatul/2019/fair/2</ext-link></mixed-citation></ref>
<ref id="r4"><mixed-citation>Else, H. (2018). How Unpaywall is transforming open science. <italic>Nature, 560</italic>, 290. <ext-link ext-link-type="doi" xlink:href="10.1038/d41586-018-05968-3">https://doi.org/10.1038/d41586-018-05968-3</ext-link></mixed-citation></ref>
<ref id="r5"><mixed-citation>ESAC community. (2017). ESAC workflow recommendations for transformative and open access agreements. <italic>ESAC Initiative</italic>. <ext-link ext-link-type="uri" xlink:href="https://esac-initiative.org/about/oa-workflows/">https://esac-initiative.org/about/oa-workflows/</ext-link></mixed-citation></ref>
<ref id="r6"><mixed-citation>Gonz&#x00E1;lez-Alcaide, G., Park, J., Huaman&#x00ED;, C., &#x0026; Ramos, J. M. (2017). Dominance and leadership in research activities: Collaboration between countries of differing human development is reflected through authorship order and designation as corresponding authors in scientific publications. <italic>PLoS One 12</italic>(8), Article e0182513. <ext-link ext-link-type="doi" xlink:href="10.1371/journal.pone.0182513">https://doi.org/10.1371/journal.pone.0182513</ext-link></mixed-citation></ref>
<ref id="r7"><mixed-citation>Gustafsson, K., Henning, K., J&#x00F6;nsson, P., Nilsson, R., Petersson, I., &#x0026; Rydholm, A. (2017). <italic>Kartl&#x00E4;ggning av OA-publicering och APC vid G&#x00F6;teborgs universitet</italic>. G&#x00F6;teborgs universitetsbibliotek. <ext-link ext-link-type="uri" xlink:href="https://gupea.ub.gu.se/handle/2077/54026">https://gupea.ub.gu.se/handle/2077/54026</ext-link></mixed-citation></ref>
<ref id="r8"><mixed-citation>Lawson, S. (2015). &#x2018;Total cost of ownership&#x2019; of scholarly communication: Managing subscription and APC payments together. <italic>Learned Publishing, 28</italic>(1), 9&#x2013;13. <ext-link ext-link-type="doi" xlink:href="10.1087/20150103">https://doi.org/10.1087/20150103</ext-link></mixed-citation></ref>
<ref id="r9"><mixed-citation>Linna, A.-K., Holopainen, M., Ikonen, A., &#x0026; Yl&#x00F6;nen, I. (2020). Kotimaiset tieteelliset julkaisusarjat ja avoimuus. <italic>Informaatiotutkimus, 39</italic>(4), 4&#x2013;32. <ext-link ext-link-type="doi" xlink:href="10.23978/inf.98656">https://doi.org/10.23978/inf.98656</ext-link></mixed-citation></ref>
<ref id="r10"><mixed-citation>Lov&#x00E9;n, L. (2019). Monitoring open access publishing costs at Stockholm University. <italic>Insights 32</italic>(1), 3. <ext-link ext-link-type="uri" xlink:href="http://doi.org/10.1629/uksg.451">http://doi.org/10.1629/uksg.451</ext-link></mixed-citation></ref>
<ref id="r11"><mixed-citation>Monaghan, J., Lucraft, M., Allin, K., van der Graaf, M., &#x0026; Clarke, T. (2020). <italic>&#x2018;APCs in the wild&#x2019;: Exploring funding streams for an accelerated transition to open access</italic> SpringerNature. <ext-link ext-link-type="doi" xlink:href="10.6084/m9.figshare.11988123.v4">https://doi.org/10.6084/m9.figshare.11988123.v4</ext-link></mixed-citation></ref>
<ref id="r12"><mixed-citation>Open Science Coordination in Finland, Federation of Finnish Learned Societies. (2020). <italic>Open access to scholarly publications. National Policy and executive plan by the research community in Finland for 2020&#x2013;2025 (1)</italic> (Responsible Research Series 3:2019, 2<sup>nd</sup> volume). <ext-link ext-link-type="doi" xlink:href="10.23847/isbn.9789525995343">https://doi.org/10.23847/isbn.9789525995343</ext-link></mixed-citation></ref>
<ref id="r13"><mixed-citation>Pieper, D., &#x0026; Broschinski, C. (2018). Openapc: A contribution to a transparent and reproducible monitoring of fee-based open access publishing across institutions and nations. <italic>Insights, 31</italic>, 1&#x2013;39. <ext-link ext-link-type="uri" xlink:href="http://doi.org/10.1629/uksg.439">http://doi.org/10.1629/uksg.439</ext-link></mixed-citation></ref>
<ref id="r14"><mixed-citation>Pinfield, S., Salter, J., &#x0026; Bath, P. A. (2016). The &#x2018;total cost of publication&#x2019; in a hybrid open-access environment: Institutional approaches to funding journal article-processing charges in combination with subscriptions. <italic>Journal of the Association for Information Science and Technology, 67</italic>(7), 1751&#x2013;1766. <ext-link ext-link-type="doi" xlink:href="10.1002/asi.23446">https://doi.org/10.1002/asi.23446</ext-link></mixed-citation></ref>
<ref id="r15"><mixed-citation>Piwowar, H., Priem, J., Larivi&#x00E8;re, V., Alperin, J. P., Matthias, L., Norlander, B., Farley, A., West, J., &#x0026; Haustein, S. (2018). The state of OA: A large-scale analysis of the prevalence and impact of open access articles. <italic>PeerJ 6</italic>, Article e4375. <ext-link ext-link-type="doi" xlink:href="10.7717/peerj.4375">https://doi.org/10.7717/peerj.4375</ext-link></mixed-citation></ref>
<ref id="r16"><mixed-citation>Priem, J. (2021, Decembre 1). What do the types of oa_status (green, gold, hybrid, and bronze) mean&#x003F; <italic>Unpaywall support portal</italic>. <ext-link ext-link-type="uri" xlink:href="https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean-">https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean-</ext-link></mixed-citation></ref>
<ref id="r17"><mixed-citation>Vil&#x00E9;n, T., Danielsson, M., Ikonen, A., Jussila, J., Laakkonen, A., Linna, A.-K., Merimaa, M., Suonp&#x00E4;&#x00E4;, S., Yl&#x00F6;nen, I., &#x0026; Xu, Q. (2021). <italic>Kirjoittajamaksut ja niiden seuranta: Havaintoja ja kehitysehdotuksia. FinELibin Open APC -projektin loppuraportti</italic> (Kansalliskirjaston raportteja ja selvityksi&#x00E4; 2/2021). Kansalliskirjasto Nationalbiblioteket. <ext-link ext-link-type="uri" xlink:href="http://urn.fi/URN:ISBN:978-951-51-7163-4">http://urn.fi/URN:ISBN:978-951-51-7163-4</ext-link></mixed-citation></ref>
<ref id="r18"><mixed-citation>Visser, M., van Eck, N. J., &#x0026; Waltman, L. (2021). Large-scale comparison of bibliographic data sources: Scopus, Web of Science, Dimensions, Crossref, and Microsoft Academic. <italic>Quantitative Science Studies, 2</italic>(1), 20&#x2013;41. <ext-link ext-link-type="doi" xlink:href="10.1162/qss_a_00112">https://doi.org/10.1162/qss_a_00112</ext-link></mixed-citation></ref>
<ref id="r19"><mixed-citation>Weiss, P. S. (2012). Who are corresponding authors&#x003F; <italic>ACS Nano, 6</italic>(4), 2861. <ext-link ext-link-type="doi" xlink:href="10.1021/nn301566x">https://doi.org/10.1021/nn301566x</ext-link></mixed-citation></ref>
</ref-list>
<fn-group>
<title>Notes</title>
<fn id="fn1"><p>VIRTA Publication Information Service, implemented in 2016, is a data hub that compiles bibliographic information of scientific publications from Finnish higher education institutions and research organisations using different local solutions for the publication data collection, such as Current Research Information Systems (CRIS). The Ministry of Education and Culture&#x2019;s annual publication data collection is carried out through VIRTA. Further information is available at <ext-link ext-link-type="uri" xlink:href="https://wiki.eduuni.fi/display/cscvirtajtp/VIRTA+in+English">https://wiki.eduuni.fi/display/cscvirtajtp/VIRTA+in+English</ext-link>.</p></fn>
<fn id="fn2"><p><ext-link ext-link-type="uri" xlink:href="https://www.kb.se/samverkan-och-utveckling/oppen-tillgang-och-bibsamkonsortiet/open-apc-sweden.html">https://www.kb.se/samverkan-och-utveckling/oppen-tillgang-och-bibsamkonsortiet/open-apc-sweden.html</ext-link></p></fn>
<fn id="fn3"><p>The Ministry of Education and Culture collects publication data from Finnish higher education institutions, state research institutions and university hospitals on a yearly basis to produce a knowledge base on the research activities and the social impact of the Finnish research system. For data collection, the publications are classified based on their format (publication type classification), scientific quality (Publication Forum) and contents (field of scientific classification).</p></fn>
<fn id="fn4"><p>Vuori, Petri (2021): Kirjoittajamaksutietojen ker&#x00E4;&#x00E4;minen ulkoisista l&#x00E4;hteist&#x00E4;. Bachelor&#x00B4;s thesis of the Turku University of Applied Sciences, available at <ext-link ext-link-type="uri" xlink:href="https://urn.fi/URN:NBN:fi:amk-2021061816385">https://urn.fi/URN:NBN:fi:amk-2021061816385</ext-link>. Although Anna-Kaarina Linna acted as one of the supervisors of the thesis and the library produced Web of Science, Scopus and Unpaywall data for the work, the analysis in this article has been carried out separately.</p></fn>
</fn-group>
</back>
</article>

''')

{}