In [2]:
import pandas as pd
collection = pd.read_csv('google_scholar_683_relevant_studies.csv')

## The different files that can be used to process them into the filtered data:

# google_scholar_683_relevant_studies.csv
# raw_data/backward_snowballing_iteration_1.csv
# raw_data/backward_snowballing_iteration_2.csv
# raw_data/forward_snowballing_iteration_1.csv

def add_criteria_columns(prefix, max_range):
    for x in range(1, max_range + 1):
        collection[prefix + str(x)] = False

# Add selection criteria as columns
add_criteria_columns('i', 6) # Inclusion
add_criteria_columns('e', 5) # Exclusion

collection

Unnamed: 0,Key,Item Type,Publication Year,Author,Title,Publication Title,ISBN,ISSN,DOI,Url,...,i2,i3,i4,i5,i6,e1,e2,e3,e4,e5
0,SN9QLFVM,conferencePaper,2016,"Benkrid, Abdenour; Benallegue, Abdelaziz; Acho...",Robot's energy consumption based multi-robot e...,2016 IEEE International Conference on Robotics...,,,,,...,False,False,False,False,False,False,False,False,False,False
1,VCVUEFIU,journalArticle,2011,"Puig, Domenec; García, Miguel Angel; Wu, L.",A new global optimization strategy for coordin...,Robotics and Autonomous Systems,,,,,...,False,False,False,False,False,False,False,False,False,False
2,6NDLPMD6,conferencePaper,2009,"Karnouskos, Stamatis; Colombo, Armando Walter;...",Towards the energy efficient future factory,2009 7th IEEE International Conference on Indu...,,,,,...,False,False,False,False,False,False,False,False,False,False
3,N3WSCPHD,journalArticle,2017,"Carabin, Giovanni; Wehrle, Erich; Vidoni, Renato",A review on energy-saving optimization methods...,Robotics,,,,,...,False,False,False,False,False,False,False,False,False,False
4,TLMKA8PC,journalArticle,2013,"Pellicciari, Marcello; Berselli, Giovanni; Lea...",A method for reducing the energy consumption o...,Mechatronics,,,,,...,False,False,False,False,False,False,False,False,False,False
5,6SB6EC8L,journalArticle,2015,"Pellegrinelli, Stefania; Borgia, Stefano; Pedr...",Minimization of the energy consumption in moti...,Procedia Cirp,,,,,...,False,False,False,False,False,False,False,False,False,False
6,3Z9TY6HA,journalArticle,2015,"Brossog, Matthias; Bornschlegl, Martin; Franke...",Reducing the energy consumption of industrial ...,The International Journal of Advanced Manufact...,,,,,...,False,False,False,False,False,False,False,False,False,False
7,26ECU8KV,journalArticle,2019,"Gadaleta, Michele; Pellicciari, Marcello; Bers...",Optimization of the energy consumption of indu...,Robotics and Computer-Integrated Manufacturing,,,,,...,False,False,False,False,False,False,False,False,False,False
8,RUVJIB22,journalArticle,2016,"Bukata, Libor; Š\uucha, Přemysl; Hanzálek, Zde...",Energy optimization of robotic cells,IEEE Transactions on Industrial Informatics,,,,,...,False,False,False,False,False,False,False,False,False,False
9,47WMULEZ,conferencePaper,2004,"Mei, Yongguo; Lu, Yung-Hsiang; Hu, Y. Charlie;...",Energy-efficient motion planning for mobile ro...,IEEE International Conference on Robotics and ...,,,,,...,False,False,False,False,False,False,False,False,False,False


In [3]:
# Filter records based on types that are to be considered (peer reviewed by nature).
def filter_types(col):
    df = pd.DataFrame(columns=col.columns)
    for index, row in col.iterrows():
        item_type = row['Item Type']
        if "journalArticle" in item_type or \
            "conferencePaper" in item_type or \
            "bookSection" in item_type or \
            "book" in item_type:
            row['i4'] = True
            df.loc[len(df)] = row
    
    return df

# Filter duplicates but keep highest priority variant.
def filter_duplicates(col):
    seen = list()
    df = pd.DataFrame(columns=col.columns)
    for index, row in col.iterrows():
        title = row['Title']
        itemType = row['Item Type']
        
        # Convert itemType from str to priority integer (lower = better)
        if itemType == "journalArticle":
            itemType = 0
        elif itemType == "conferencePaper":
            itemType = 1
        elif itemType == "book":
            itemType = 2
        elif itemType == "bookSection":
            itemType = 3
        
        # Create 3-tuple of important, necessary items.
        item = (index, title, itemType)
        
        if not item[1] in [t[1] for t in seen]:       # If title not yet seen, add it to DF and to seen
            df_index = len(df)
            df.loc[df_index] = row
            item = (df_index, item[1], item[2])       # Update index of item to new index in new DF
            seen.append(item)
        else:
            stored_item = [t for t in seen if t[1] == item[1]][0] # Retrieve stored_item which has title seen (dup)
            if stored_item[2] > item[2]:              # If current item has higher priority than stored item, update
                df.drop(stored_item[0])               # Drop original from DF
                df_index = len(df)                    # Index at which item will be placed
                df.loc[df_index] = row                # Place row at index
                item = (df_index, item[1], item[2])   # Update item index to new index in new DF
                seen[seen.index(stored_item)] = item  # Update seen by overwriting stored_item to current item
                
    return df

# Purely for testing, not used at all and should not be used on the collection.
# Only to get an indication of the amount of papers that are more explicit in the title.
def filter_titles(col):
    df = pd.DataFrame(columns=col.columns)
    for index, row in col.iterrows():
        title = row['Title']
        if "efficiency" in title or \
            "consumption" in title or \
            "optimisation" in title or \
            "optimization" in title or \
            "modeling" in title or \
            "analysis" in title:
            df.loc[len(df)] = row

    return df

In [4]:
# Filter the collection based on 'peer review by nature' types as discussed in email contact.
col_types = filter_types(collection)
col_types

Unnamed: 0,Key,Item Type,Publication Year,Author,Title,Publication Title,ISBN,ISSN,DOI,Url,...,i1,i2,i3,i4,i5,e1,e2,e3,e4,e5
0,6BH5WM7V,journalArticle,2017.0,"Du, Zhihui; He, Ligang; Chen, Yinong; Xiao, Yu...",Robot Cloud: Bridging the power of robotics an...,Future Generation Computer Systems,,,,,...,False,False,False,True,False,False,False,False,False,False
1,CAV3PKJB,journalArticle,2007.0,"Han, Jun; Asada, Akira; Ura, Tamaki; Yamauchi,...",Noncontact power supply for seafloor geodetic ...,Journal of marine science and technology,,,,,...,False,False,False,True,False,False,False,False,False,False
2,LCA5ZRZQ,conferencePaper,2010.0,"Wang, Binhai; Chen, Xiguang; Wang, Qian; Liu, ...",Power line inspection with a flying robot,2010 1st International Conference on Applied R...,,,,,...,False,False,False,True,False,False,False,False,False,False
3,8YQZGJKS,journalArticle,2005.0,"CAI, Gai-pin; HUANG, Zhi-qing",The Realization of the Industrial Revolving Tu...,Machine Tool & Hydraulics,,,,,...,False,False,False,True,False,False,False,False,False,False
4,GK66MU6T,conferencePaper,2006.0,"Mei, Yongguo; Lu, Yung-Hsiang; Lee, CS George;...",Energy-efficient mobile robot exploration,Proceedings 2006 IEEE International Conference...,,,,,...,False,False,False,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610,XF5HJDYH,journalArticle,2018.0,"Abadi, Vida Shams Esfand; Rostami, Mostafa; Ra...",Walking Path Prevision of Biped Robot along wi...,Modares Mechanical Engineering,,,,,...,False,False,False,True,False,False,False,False,False,False
611,8CEZTE8L,journalArticle,2019.0,"Almécija Murciano, Francisco Javier",Power management strategies for a mobile robot,,,,,,...,False,False,False,True,False,False,False,False,False,False
612,GXWNQ2F3,journalArticle,2001.0,"Krstulović, Ante",Robot Energy Efficiency Through Redundancy,Strojarstvo,,,,,...,False,False,False,True,False,False,False,False,False,False
613,YUR8N5QG,journalArticle,2011.0,王文俊,高效率電源管理之智慧型僕役機器人-總計畫: 高效率電源管理之智慧型僕役機器人; Intell...,財團法人國家實驗研究院科技政策研究與資訊中心,,,,,...,False,False,False,True,False,False,False,False,False,False


In [5]:
# Filter duplicates and keep highest priority ones, export to CSV (final record set).
col_dups = filter_duplicates(col_types)
col_dups.to_csv("processed_data/forward_snowballing_1_filtered.csv")

# processed_data/backward_snowballing_iteration_1_filtered.csv
# processed_data/backward_snowballing_iteration_2_filtered.csv
# processed_data/forward_snowballing_iteration_1_filtered.csv

Unnamed: 0,Key,Item Type,Publication Year,Author,Title,Publication Title,ISBN,ISSN,DOI,Url,...,i1,i2,i3,i4,i5,e1,e2,e3,e4,e5
0,6BH5WM7V,journalArticle,2017.0,"Du, Zhihui; He, Ligang; Chen, Yinong; Xiao, Yu...",Robot Cloud: Bridging the power of robotics an...,Future Generation Computer Systems,,,,,...,False,False,False,True,False,False,False,False,False,False
1,CAV3PKJB,journalArticle,2007.0,"Han, Jun; Asada, Akira; Ura, Tamaki; Yamauchi,...",Noncontact power supply for seafloor geodetic ...,Journal of marine science and technology,,,,,...,False,False,False,True,False,False,False,False,False,False
2,LCA5ZRZQ,conferencePaper,2010.0,"Wang, Binhai; Chen, Xiguang; Wang, Qian; Liu, ...",Power line inspection with a flying robot,2010 1st International Conference on Applied R...,,,,,...,False,False,False,True,False,False,False,False,False,False
3,8YQZGJKS,journalArticle,2005.0,"CAI, Gai-pin; HUANG, Zhi-qing",The Realization of the Industrial Revolving Tu...,Machine Tool & Hydraulics,,,,,...,False,False,False,True,False,False,False,False,False,False
4,GK66MU6T,conferencePaper,2006.0,"Mei, Yongguo; Lu, Yung-Hsiang; Lee, CS George;...",Energy-efficient mobile robot exploration,Proceedings 2006 IEEE International Conference...,,,,,...,False,False,False,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,XF5HJDYH,journalArticle,2018.0,"Abadi, Vida Shams Esfand; Rostami, Mostafa; Ra...",Walking Path Prevision of Biped Robot along wi...,Modares Mechanical Engineering,,,,,...,False,False,False,True,False,False,False,False,False,False
606,8CEZTE8L,journalArticle,2019.0,"Almécija Murciano, Francisco Javier",Power management strategies for a mobile robot,,,,,,...,False,False,False,True,False,False,False,False,False,False
607,GXWNQ2F3,journalArticle,2001.0,"Krstulović, Ante",Robot Energy Efficiency Through Redundancy,Strojarstvo,,,,,...,False,False,False,True,False,False,False,False,False,False
608,YUR8N5QG,journalArticle,2011.0,王文俊,高效率電源管理之智慧型僕役機器人-總計畫: 高效率電源管理之智慧型僕役機器人; Intell...,財團法人國家實驗研究院科技政策研究與資訊中心,,,,,...,False,False,False,True,False,False,False,False,False,False
