In [21]:
import pandas as pd
import numpy as np

In [164]:
def collapse_ids(ids):
    if len(ids)>0:
        ids = sorted(ids)
        is_group = False
        result = [ids[0]]
        for i, id in enumerate(ids):
            if i==0:
                continue       
            if (id == ids[i-1]+1) & (not is_group):
                is_group=True
                result.append('-')
            if (is_group & (id != ids[i-1]+1)):
                is_group = False
                result.append(ids[i-1])
            
            if is_group & (len(ids) == i+1):
                result.append(ids[i])
                result.append(', ')
            if not is_group:
                result.append(', ')
                result.append(id)
        return '['+''.join(map(str,result)).strip(', ')+']'
    else:
        return ''



print(collapse_ids([101,1,2,3,999,5,6,99,100]))
print(collapse_ids([1,2,3]))
print(collapse_ids([1,24,35]))
print(collapse_ids([1,3]))
print(collapse_ids([]))
            

[1-3, 5-6, 99-101, 999]
[1-3]
[1, 24, 35]
[1, 3]



In [217]:
# https://docs.google.com/spreadsheets/d/1XN2hxenZtmYpYkplkH6CIW3iypAPI27j/edit?gid=774440903#gid=774440903
sheet_id = "1XN2hxenZtmYpYkplkH6CIW3iypAPI27j"
sheet_name = "Sheet1" 

url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"


In [218]:
pubs = pd.read_csv(url).fillna(False).replace("x",True)
pubs.columns = [col.strip() for col in pubs.columns]

In [219]:
# first fix numbering into chronological order
# pubs = pubs.sort_values('Year')
pubs = pubs.sort_values([ 'Is journal', 'Is conference', 'Is workshop','Year'],ascending=[False, False, False, True])
pubs['ID'] = np.arange(1, len(pubs)+1)

# add another column for filtering
pubs['Total'] = True

In [220]:
filter_cols = [
    'Total',
    'Sole author',
    'First author', 'Most senior author',
    'With supervised doctoral student',
    'With supervised first or second cycle student',
    'With external collaboration partner', 
    'With domestic academic partner',
    'With international collaboration partner',
    'Publications during the last 5 years'
]
data_cols = [
    'Is journal', 
    'Is conference', 
    'Is workshop',
    'Monograph', 
    'Anthologies', 
    'Articles in anthologies with peer-review',
    'Popular science works',
    'Study material',
    'Is other'
]

In [221]:
def format_cell(values):
    # print(values)
    # print()
    return f"{str(len(values))} {collapse_ids(values)}"
    
    

result = []
for fc in filter_cols:
    row = []
    for dc in data_cols:
       
        cat_pubs = pubs[(pubs[dc] == True) & (pubs[fc] == True )]
        row.append(format_cell(cat_pubs.ID.values))
    result.append(row)


publications_list = pd.DataFrame(result, index = filter_cols, columns = data_cols)

# # merge empty cols
merge_cols = ['Monograph','Anthologies','Articles in anthologies with peer-review','Popular science works']
# publications_list.columns[ publications_list.sum() == 0]
if len(merge_cols) > 0:
    new_col = ', '.join(merge_cols)
    publications_list[new_col] = 0
    publications_list=  publications_list.drop(columns=merge_cols)

publications_list




# result

Unnamed: 0,Is journal,Is conference,Is workshop,Study material,Is other,"Monograph, Anthologies, Articles in anthologies with peer-review, Popular science works"
Total,14 [1-14],9 [15-23],3 [24-26],1 [28],"4 [27, 29-31]",0
Sole author,0,0,2 [24-25],0,0,0
First author,"7 [4-8, 10, 12]","5 [15-17, 21-22]",3 [24-26],1 [28],1 [27],0
Most senior author,0,1 [22],1 [26],1 [28],0,0
With supervised doctoral student,0,0,0,0,1 [29],0
With supervised first or second cycle student,0,0,0,0,0,0
With external collaboration partner,3 [10-12],1 [23],0,1 [28],1 [29],0
With domestic academic partner,1 [11],0,1 [26],0,1 [30],0
With international collaboration partner,"8 [2-3, 6, 8-9, 11, 13-14]","4 [17-18, 20, 23]",0,0,"3 [27, 30-31]",0
Publications during the last 5 years,9 [6-14],5 [19-23],1 [26],1 [28],"4 [27, 29-31]",0


In [222]:
for dc in data_cols:
    cat_pubs = pubs[(pubs[dc] == True)]
    if len(cat_pubs)>0:
        print('\n'+dc) 
        for id, ref in cat_pubs[['ID', 'Ref']].values:
            print(f"[{id}]\t{ref}")


Is journal
[1]	Unterkalmsteiner, M., Gorschek, T., Feldt, R., & Klotins, E. (2015). Assessing requirements engineering and software test alignment—Five case studies. Journal of systems and software, 109, 62-77.
[2]	Unterkalmsteiner, M., Abrahamsson, P., Wang, X., Nguyen-Duc, A., Shah, S. Q., Bajwa, S. S., ... & Yague, A. (2016). Software startups–a research agenda. e-Informatica Software Engineering Journal, 10(1), 89-123.
[3]	Tripathi, N., Klotins, E., Prikladnicki, R., Oivo, M., Pompermaier, L. B., Kudakacheril, A. S., ... & Gorschek, T. (2018). An anatomy of requirements engineering in software startups using multi-vocal literature and case survey. Journal of Systems and Software, 146, 130-151.
[4]	Klotins, E., Unterkalmsteiner, M., & Gorschek, T. (2018). Software engineering antipatterns in start-ups. IEEE Software, 36(2), 118-126.
[5]	Klotins, E., Unterkalmsteiner, M., & Gorschek, T. (2018). Software-intensive product engineering in start-ups: a taxonomy. IEEE Software, 35(4), 44