In [1]:
import os
import re
import numpy
import pandas as pd
# import PyPDF2
import requests
from bs4 import BeautifulSoup
from googlesearch import search

In [2]:
base_url = "https://ieeexplore.ieee.org/search/searchresult.jsp"
file = "Quantum_Machine_Learning_for_6G_Communication_Networks:_State-of-the-Art_and_Vision_for_the_Future" 
path = os.getcwd() + "/references/" + file +".txt"
csv_path = os.getcwd() + "/data/" + file +".csv"

In [3]:
class ResearchPaperList:
    def __init__(self, url="" , title = ""):
        self.base_url = url
        self.title = title
        self.path = os.getcwd() + "/references/" + title +".txt"
        self.csv_path = os.getcwd() + "/data/" + title +".csv"
        self.csv_data = ""
        self.ref_data = ""
    
    def open_csv(self):
        if(not os.path.exists(self.csv_path)):
            print("csv file not exists")
            return
        self.csv_data = pd.read_csv(self.csv_path, index_col = 0)
    
    def open_ref(self):
        if(not os.path.exists(self.path)):
            print("file not exists")
            return
        if(self.ref_data):
            self.ref_data.close()
        self.ref_data =  open(path,"r", encoding='utf-8') 
        
    def print_ref(self, head_count = 5000):
        if not self.ref_data:
            self.open_ref()
        if(self.ref_data):
            while True:
                if(head_count == 0) : break
                line = self.ref_data.readline()
                if not line: break
                print(line) 
                head_count -= 1
    
    def close_ref(self):
        if(self.ref_data):
            self.ref_data.close()
            
    def set_data(self, url="", title=""):
        self.base_url = url
        self.title = title
        self.path = os.getcwd() + "/references/" + title +".txt"
        self.csv_path = os.getcwd() + "/data/" + title +".csv"
        self.open_csv()
        self.open_ref()
        
    def check_index(self, line,index):
        if(line[0]=="[" and line[1:1+len(str(index))] == str(index)):
            return True
        else:
            return False

    def get_paper_html(self, search_url):
        response = requests.get(search_url)
        if response.status_code == 200:
            html = response.text
            soup = BeautifulSoup(html, 'html.parser')
            return soup
        else:
            return -1

    def get_citations(self, html):
        html = str(html)
        data = 0
        temp = html.split('"citationCount":"')
        if(len(temp)>1):
            data = int(temp[1].split('"')[0])
        return data

    def get_INSPEC(self, html):
        html = str(html)
        data = 0
        temp = html.split('"accessionNumber":"')
        if(len(temp)>1):
            data = int(temp[1].split('"')[0])
        return data

    def get_DOI(self, html):
        html = str(html)
        data = ""
        temp = html.split('"doi":"')
        if(len(temp)>1):
            data = temp[1].split('"')[0]
        return data

    def get_date(self, html):
        html = str(html)
        data = ""
        temp = html.split('"dateOfInsertion":"')
        if(len(temp)>1):
            data = temp[1].split('"')[0]
        return data
        
    def handle_data(self, csv_data, current,index):
        temp = ["","","",0,"",0,"",""]
        current =current.replace("\n","")
    #     print("index ",index,"\n",current)
    #     parse = re.split('“|”',current)
        parse = re.split('‘‘|’’',current)
        if(len(parse)<2):
            return
        temp[0] = parse[1][:-1]
        temp[1] = parse[0]
        is_url = False
        if("IEEE" in parse[2]):
            temp[6] = "IEEE"
            for search_url in search(temp[0], tld="co.in", num=5, stop=5, pause=1):
                if('ieee' in search_url):
                    temp[7] = search_url
                    is_url = True
                    break;
        if(is_url):
            
            html = self.get_paper_html(temp[7])
            temp[5] = self.get_citations(html)
            temp[3] = self.get_INSPEC(html)
            temp[4] = self.get_DOI(html)
            temp[2] = self.get_date(html)
        print("idx ",index," ",temp)
        self.csv_data.loc[index] = temp
        

    def extract_reference(self):
        index = 1
        
        current = ""
        while True:
            
            line = self.ref_data.readline()
            if not line: break
            if(self.check_index(line, index+1)):
    #             print("here")
                self.handle_data(self.csv_data,current[3+len(str(index)):],index)
                current = line
                index += 1
            else:
                current += line
        self.ref_data.close()
    
    def save_csv_data(self):
        self.csv_data.to_csv(csv_path,sep=',', na_rep='NaN')
                
            

In [4]:
q1 = ResearchPaperList()
q1.set_data(base_url, file)


In [None]:
q1.extract_reference()

idx  2   ['A survey of 5G network: Architecture andemerging technologies', 'A. Gupta and E. R. K. Jha, ', '07 August 2015', 15351996, '10.1109/ACCESS.2015.2461602', 1315, 'IEEE', 'https://ieeexplore.ieee.org/document/7169508']
idx  3   ['Towards massive connectivity support for scal-able mMTC communications in 5G networks', 'C. Bockelmann et al., ', '18 June 2018', 17829625, '10.1109/ACCESS.2018.2837382', 131, 'IEEE', 'http://ieeexplore.ieee.org/document/8360103/']
idx  4   ['Towards massive machinetype communications in ultra-dense cellular IoT networks: Currentissues and machine learning-assisted solutions', 'S. K. Sharma and X. Wang. (Aug. 2018). ', '', 0, '', 0, '', '']
idx  5   ['Open issues and beyond 5G', 'M. Chiani, E. Paolini, and F. Callegati, ', '', 0, '', 0, '', '']
idx  6   ['6G vision and requirements: Is there any needfor beyond 5G', 'K. David and H. Berndt, ', '30 August 2018', 18058905, '10.1109/MVT.2018.2848498', 250, 'IEEE', 'https://ieeexplore.ieee.org/document/8412

idx  45   ['5G: A tutorial overview of standards, trials, challenges,deployment, and practice', 'M. Shafi et al., ', '02 June 2017', 16915203, '10.1109/JSAC.2017.2692307', 1044, 'IEEE', 'https://ieeexplore.ieee.org/document/7894280']
idx  46   ['What will 5G be', 'J. G. Andrews et al., ', '22 July 2014', 14469237, '10.1109/JSAC.2014.2328098', 5490, 'IEEE', 'https://ieeexplore.ieee.org/document/6824752']
idx  47   ['Millimeter-wave communication with out-of-band information', 'N. Gonzalez-Prelcic, A. Ali, V. Va, and R. W. Heath, Jr., ', '13 December 2017', 17446971, '10.1109/MCOM.2017.1700207', 53, 'IEEE', 'http://ieeexplore.ieee.org/document/8198818']
idx  48   ['Energy and spectral effi-ciency of very large multiuser MIMO systems', 'H. Q. Ngo, E. G. Larsson, and T. L. Marzetta, ', '07 May 2013', 13487669, '10.1109/TCOMM.2013.020413.110848', 2011, 'IEEE', 'https://ieeexplore.ieee.org/document/6457363']
idx  49   ['Optimal cell load and throughput in green small cellnetworks with genera

idx  81   ['Selecting array con-figurations for MIMO systems: An evolutionary computation approach', 'P. D. Karamalis, N. D. Skentos, and A. G. Kanatas, ', '03 January 2005', 8258804, '10.1109/TWC.2004.837447', 30, 'IEEE', 'http://ieeexplore.ieee.org/document/1374907/']
idx  82   ['Power control of cellularradio systems via robust Smith prediction filter', 'B.-K. Lee, H.-W. Chen, and B.-S. Chen, ', '18 October 2004', 8155807, '10.1109/TWC.2004.834705', 39, 'IEEE', 'http://ieeexplore.ieee.org/abstract/document/1343917/']
idx  83   ['Joint receive antenna selection and symboldetection for MIMO systems: A heterogeneous genetic approach', 'H. Y. Lu and W. H. Fang, ', '13 February 2009', 10475211, '10.1109/LCOMM.2009.081036', 7, 'IEEE', 'http://ieeexplore.ieee.org/document/4783770']
idx  84   ['GAbased estimation of sparse MIMO channels with superimposed training', 'B. Mansoor, S. J. Nawaz, M. I. Tiwana, J. Ahmed, and A. Haseeb, ', '', 0, '', 0, '', '']
idx  85   ['Evolutionary algorithms f

idx  124   ['The private classical capacity and quantum capacity of aquantum channel', 'I. Devetak, ', '10 January 2005', 8267891, '10.1109/TIT.2004.839515', 479, 'IEEE', 'https://ieeexplore.ieee.org/iel5/18/30067/01377491.pdf']
idx  125   ['Identifying the informationgain of a quantum measurement', 'M. Berta, J. M. Renes, and M. M. Wilde, ', '11 August 2014', 14515021, '10.1109/ISIT.2014.6874849', 1, 'IEEE', 'http://ieeexplore.ieee.org/document/6874849/']
idx  126   ['Analysis of quantum key distribution basedsatellite communication', 'V. Sharma and S. Banerjee, ', '', 0, '', 0, '', '']
idx  127   ['Quantum key distribution technologies', 'K. Inoue, ', '', 0, '', 0, 'IEEE', '']
idx  128   ['Teleporting an unknown quantum state via dual clas-sical and einstein-podolsky-rosen channels', 'C. H. Bennett, G. Brassard, C. Crépeau, R. Jozsa, A. Peres, andW. K. Wootters, ', '', 0, '', 0, '', '']
idx  129   ['Communication via one- and two-particle operators on Einstein-Podolsky-Rosen states',

In [5]:
pd.set_option('display.max_rows', 300)
q1.csv_data.head(300)

Unnamed: 0,title,author,date,INSPEC,DOI,citations,publisher,url
0,Quantum Machine Learning for 6G Communication ...,"Syed Junaid Nawaz, Shree Krishna Sharma, Shurj...",04 April 2019,18609246,10.1109/ACCESS.2019.2909490,170,IEEE,https://ieeexplore.ieee.org/document/8681450
2,A survey of 5G network: Architecture andemergi...,"A. Gupta and E. R. K. Jha,",07 August 2015,15351996,10.1109/ACCESS.2015.2461602,1308,IEEE,https://ieeexplore.ieee.org/document/7169508
3,Towards massive connectivity support for scal-...,"C. Bockelmann et al.,",18 June 2018,17829625,10.1109/ACCESS.2018.2837382,131,IEEE,http://ieeexplore.ieee.org/document/8360103/
4,Towards massive machinetype communications in ...,S. K. Sharma and X. Wang. (Aug. 2018).,,0,,0,,
5,Open issues and beyond 5G,"M. Chiani, E. Paolini, and F. Callegati,",,0,,0,,
6,6G vision and requirements: Is there any needf...,"K. David and H. Berndt,",30 August 2018,18058905,10.1109/MVT.2018.2848498,247,IEEE,https://ieeexplore.ieee.org/document/8412482
7,A speculative study on 6G,"F. Tariq, M. Khandaker, K.-K. Wong, M. Imran, ...",,0,,0,,
8,6G: The next frontier,"E. C. Strinati, S. Barbarossa, J. L. Gonzalez-...",,0,,0,,
9,"A vision of 6Gwireless systems: Applications, ...","W. Saad, M. Bennis, and M. Chen. (Mar. 2019).",,0,,0,,
11,Thirty years of machine learning: The road to ...,"J. Wang, C. Jiang, H. Zhang, Y. Ren, K.-C. Che...",,0,,0,,


In [6]:
q1.save_csv_data()