In [27]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

import pandas as pd
import util.utilities as util
from pybtex.database import parse_file

In [28]:
def get_dict_bib(bib_data, filename):
    """
    Gera um dicionario contendo os dados das referencias bibliograficas faz o print de cada bib registrada
    @param: bid_data Read bibliography data from file and return a BibliographyData object
    @return: retorna um dicionario com os dados key, year, list_authors, title e abstract
    """
    try:
        my_data = dict()

        my_data['key'] = list()
        my_data['year'] = list()
        my_data['list_authors'] = list()
        my_data['title'] = list()
        my_data['abstract'] = list()

        i = 1
        for key, value in bib_data.entries.items(): 
            year = value.fields['year']
            list_authors = util.show_names(value.persons['author'])
            title = value.fields['title']
            if ('abstract' in value.fields): 
                abstract = value.fields['abstract']
            else: 
                abstract = None    
            my_data['key'].append(key)
            my_data['year'].append(year)
            my_data['list_authors'].append(list_authors)
            my_data['title'].append(title)
            my_data['abstract'].append(abstract)

            print('{}, {}, {}, {}, {}, {}'.format(i, key, year, list_authors, title, abstract))
            print('---')
            i += 1
        print("Sucesso! Leitura de {} entradas no arquivo {}".format(i-1, filename))
    except Exception as e:
        print("Erro " + str(e)+ " na leitura do arquivo " + filename)
    return my_data

In [29]:
#my_data = get_dict_bib(bib_data=parse_file('../../bib/bibliography.bib'))
my_data = get_dict_bib(bib_data=parse_file('../../bib/thesis/merged/mergedallpublishedclean.bib'), filename='mergedallpublishedclean.bib')

1, martini2014architecture, 2014, ['Antonio Martini', 'Jan Bosch', 'Michel Chaudron'], Architecture technical debt: Understanding causes and a qualitative model, None
---
2, kaufmann2015addressing, 2015, ['Robert Kaufmann', 'Sam Odeh'], Addressing efficiency interest in architectural technical debt-A measurement and visualization approach for embedded software, None
---
3, martini2015investigating, 2015, ['Antonio Martini', 'Jan Bosch', 'Michel Chaudron'], Investigating architectural technical debt accumulation and refactoring over time: A multiple-case study, None
---
4, vogel2015applicability, 2015, ['Birgit Vogel-Heuser', 'Susanne R{\\"o}sch'], Applicability of technical debt as a concept to understand obstacles for evolution of automated production systems, None
---
5, del2016identification, 2016, ['Paul Carpio'], Identification of architectural technical debt: An analysis based on naming patterns, None
---
6, dahl2017estimating, 2017, ['Gustav Dahl'], Estimating Architectural Tech

In [30]:
# Gera um dataframe contendo a os dados lidos da referencia bibliografica
df_bib = pd.DataFrame(data=my_data, columns=['key', 'year', 'list_authors', 'title', 'abstract'])

df_bib.head(10)

Unnamed: 0,key,year,list_authors,title,abstract
0,martini2014architecture,2014,"[Antonio Martini, Jan Bosch, Michel Chaudron]",Architecture technical debt: Understanding cau...,
1,kaufmann2015addressing,2015,"[Robert Kaufmann, Sam Odeh]",Addressing efficiency interest in architectura...,
2,martini2015investigating,2015,"[Antonio Martini, Jan Bosch, Michel Chaudron]",Investigating architectural technical debt acc...,
3,vogel2015applicability,2015,"[Birgit Vogel-Heuser, Susanne R{\""o}sch]",Applicability of technical debt as a concept t...,
4,del2016identification,2016,[Paul Carpio],Identification of architectural technical debt...,
5,dahl2017estimating,2017,[Gustav Dahl],Estimating Architectural Technical Debt: A des...,
6,aaramaa2017requirements,2017,"[Sanja Aaramaa, Sandun Dasanayake, Markku Oivo...",Requirements volatility in software architectu...,
7,martini2017revealing,2017,"[Antonio Martini, Jan Bosch]",Revealing social debt with the CAFFEA framewor...,
8,mendoza2017using,2017,[Paul Carpio],Using Naming Patterns for Identifying Architec...,
9,hsu2018investigating,2018,[Jen Hsu],Investigating the causes of software technical...,


In [18]:
df_bib_sort_by_year = df_bib.sort_values('year')

df_bib_sort_by_year.head(10)

Unnamed: 0,key,year,list_authors,title,abstract
28,Dirac1953888,1953,[P.A.M. Dirac],The lorentz transformation and absolute time,
177,lawson:56,1956,[Ray Lawson],Implications of surface temperatures in the di...,
29,Feynman1963118,1963,"[R.P Feynman, F.L {Vernon Jr.}]",The theory of a general quantum system interac...,
51,brooks1974mythical,1974,[Frederick Brooks],The mythical man-month,
174,mccabe1976complexity,1976,[Thomas McCabe],A complexity measure,
136,koenker1982robust,1982,"[Roger Koenker, Gilbert Bassett Jr]",Robust tests for heteroscedasticity based on r...,
134,chatterjee1986influential,1986,"[Samprit Chatterjee, Ali Hadi]","Influential observations, high leverage points...",
99,standardieee:90,1990,"[Standard Committee, others]",IEEE Standard Glossary of Software Engineering...,
163,Cunningham:1992,1992,[Ward Cunningham],The WyCash Portfolio Management System,
54,perry1992foundations,1992,"[Dewayne Perry, Alexander Wolf]",Foundations for the study of software architec...,


In [25]:
df_bib_sort_by_title = df_bib.sort_values('title')

df_bib_sort_by_title.head(10)

Unnamed: 0,key,year,list_authors,title,abstract
110,Kazman_2015,2015,"[Rick Kazman, Yuanfang Cai, Ran Mo, Qiong Feng...",A Case Study in Locating the Architectural Roo...,"Our recent research has shown that, in large-s..."
138,Li:2015,2015,"[Zengyang Li, Paris Avgeriou, Peng Liang]",A Systematic Mapping Study on Technical Debt a...,
144,lim2012balancing,2012,"[Erin Lim, Nitin Taksande, Carolyn Seaman]",A balancing act: what software practitioners h...,
174,mccabe1976complexity,1976,[Thomas McCabe],A complexity measure,
67,chidamber1994metrics,1994,"[Shyam Chidamber, Chris Kemerer]",A metrics suite for object oriented design,
146,guo2011portfolio,2011,"[Yuepu Guo, Carolyn Seaman]",A portfolio approach to technical debt management,
18,perez2019proposed,2019,"[Boris P{\'e}rez, Dar{\'\i}o Correal, Hern{\'a...",A proposed model-driven approach to manage arc...,
10,martini2018semi,2018,"[Antonio Martini, Erik Sikander, Niel Madlani]",A semi-automated framework for the identificat...,
20,perez2020semiautomatic,2020,[Boris P{\'e}rez],A semiautomatic approach to identify architect...,
172,elish2013suite,2013,"[Mahmoud Elish, Mojeeb Al-Rahman Al-Khiaty]",A suite of metrics for quantifying historical ...,


In [26]:
df_bib_sort_by_title.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 183 entries, 110 to 132
Data columns (total 5 columns):
key             183 non-null object
year            183 non-null object
list_authors    183 non-null object
title           183 non-null object
abstract        29 non-null object
dtypes: object(5)
memory usage: 8.6+ KB


In [37]:
try: 
    my_file = 'mergedallpubliblished.csv'
    df_bib.to_csv(my_file, encoding='utf-8', sep=';')
    print(f'{my_file} Salvo com sucesso!')
except Exception as e:
    print(f'Erro ao salvar o arquivo {my_file}: {e}')

mergedallpubliblished.csv Salvo com sucesso!


In [34]:
df_bib

Unnamed: 0,key,year,list_authors,title,abstract
0,martini2014architecture,2014,"[Antonio Martini, Jan Bosch, Michel Chaudron]",Architecture technical debt: Understanding cau...,
1,kaufmann2015addressing,2015,"[Robert Kaufmann, Sam Odeh]",Addressing efficiency interest in architectura...,
2,martini2015investigating,2015,"[Antonio Martini, Jan Bosch, Michel Chaudron]",Investigating architectural technical debt acc...,
3,vogel2015applicability,2015,"[Birgit Vogel-Heuser, Susanne R{\""o}sch]",Applicability of technical debt as a concept t...,
4,del2016identification,2016,[Paul Carpio],Identification of architectural technical debt...,
5,dahl2017estimating,2017,[Gustav Dahl],Estimating Architectural Technical Debt: A des...,
6,aaramaa2017requirements,2017,"[Sanja Aaramaa, Sandun Dasanayake, Markku Oivo...",Requirements volatility in software architectu...,
7,martini2017revealing,2017,"[Antonio Martini, Jan Bosch]",Revealing social debt with the CAFFEA framewor...,
8,mendoza2017using,2017,[Paul Carpio],Using Naming Patterns for Identifying Architec...,
9,hsu2018investigating,2018,[Jen Hsu],Investigating the causes of software technical...,
