In [1]:
# import internal packages
import semi_automated_literature_search as LS
import semi_automated_information_search as IS
import file_path_management
import public_library as PL
import automatic_filtering as AF

10.1016/j.neuroscience.2007.02.033


In the next cell, we present all parameters that might have an effect on the search results, including:<br>
1. searching keyword lexicon
2. on-topic keyword lexicon
3. academic databases
4. seed papers
5. connectome database and 
6. connectome database queries
7. ChatPDF queries for relatedness of topic

In [2]:
# parameters

# searching keyword lexicon
# (macaque OR Macaque) AND (thalamocortical OR thalamocortical OR corticothalamic OR 'cortico-thalamic' OR thalamus OR cortex)
search_kws_lexicon = 'macaque AND (thalamus OR thalamocortical OR thalamo-cortical)'

# academic databases
# Google Scholar: 'https://scholar.google.com/'
# 78300 results
# Web of Science: 'https://www.webofscience.com/wos/woscc/advanced-search' # can be exported to excel file
# 961 results
# PubMed Central PMC: 'https://pubmed.ncbi.nlm.nih.gov/advanced/' # can be exported to .csv file and abstract.txt file
# 2448 results
# Europe PMC = 'https://europepmc.org/advancesearch' # can be exported to .csv file or abstract and full open access file .xml
# 5129 results
acad_dbs = ['Google Scholar', 'Web of Science', 'PubMed_Central_PMC', 'Europe_PMC']

# initial urls for specified searching keyword lexicon and all academic databases
init_urls = {
             'gs': 'https://scholar.google.com/scholar?start=0&q=macaque+thalamus+OR+thalamocortical+OR+thalamo-cortical&hl=en&as_sdt=1,5',
             'wos': 'https://www.webofscience.com/wos/woscc/summary/79530a3c-47d5-4dd0-9b7d-b1d92fd11882-98d8472a/relevance/1',
             'pubmed': 'https://pubmed.ncbi.nlm.nih.gov/?term=(((thalamus)%20OR%20(thalamocortical))%20OR%20(thalamo-cortical))%20AND%20(macaque)&sort=&page=1',
             'eupmc': 'https://europepmc.org/search?query=%28%22macaque%22%20AND%20%28%22thalamus%22%20OR%20%22thalamocortical%22%20OR%20%22thalamo-cortical%22%29%20%29%20AND%20%28LANG%3A%22eng%22%20OR%20LANG%3A%22en%22%20OR%20LANG%3A%22us%22%29&page=1'
            }

# on-topic keyword lexicon
on_topic_kws = ['thalamocortical', 'thalamo-cortical', 'corticothalamic', 'cortico-thalamic',
                'tracing', 'tracer', 'tract tracing', 'tract-tracing', 'axonal tracing', 'neural tracing', 'anatomical tracing', 'anatomical neural tracing',
                'connection', 'projection', 'connectivity', 'connectome', 
                'thalamus', 'cortex', 'thalamic', 'cortical']

# seed papers specification
seed_papers = []

# connectome database and queries specification
# we search the CoCoMac
connec_db = ''
connec_db_quries = []

# ChatGPT, queries for relatedness of topic
ChatGPT_related_queries = ['Does the given text include information of thalamocotical connection?', 
                           'Does the given text include information of connection between thalamus and cortex?']

# meta categories, keywords, and correspond queries
meta_categ = ['DOI', 'Publication_link', 'pdf_link', 'Authors', 'Year', 'Country', 'Affiliation', 
              'Title', 'Abstract', 'Keywords', 
              'Thalamic_parcellation_scheme', 'Cortical_parcellation_scheme', 
              'Thalamic_area_focused', 'Cortical_area_focused',
              'Steriotactic_axis', 'Type_of_data']
meta_categ_kws = []
meta_categ_quries = []

In [3]:
# all the file paths in file_path_management.py
# project_folder
# gs_poten_urls
# wos_poten_urls
# pubmed_pmc_poten_urls
# eupmc_poten_urls
# path_poten_csv
# path_related_urls
# path_related_csv
# pdf_folder_path
# seed_paper_urls
# connec_db_urls

In [4]:
# main program
if __name__ == "__main__":
    # first we need to search all related literature that might include data or information of thalamocortical connections
    # search for potentially related literature using the listed 3 methods
    
    # setting headers and proxies
    headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9'}
    http_proxy  = "http://103.148.39.50:83"
    https_proxy = "https://47.254.158.115:20201"
    proxy = {"http": http_proxy, "https": https_proxy}
    # end of setting header and proxies
    
    # method 1: search acdemic databases using keywords
    LS.search_acad_dbs(acad_dbs, init_urls, headers, proxy)
    '''
    # method 2: spanning citations of seed papers
    LS.span_citations(seed_papers, num_span_time, headers, proxy)
    
    # method 3: search existing connectome databases
    LS.search_conne_db(connec_db, connec_db_quries)
    '''

Searching Google Scholar...
<!DOCTYPE html>
<html><head><title>Google Scholar</title><meta content="text/html;charset=utf-8" http-equiv="Content-Type"/><meta content="IE=Edge" http-equiv="X-UA-Compatible"/><meta content="always" name="referrer"/><meta content="width=device-width,initial-scale=1,minimum-scale=1,maximum-scale=2" name="viewport"/><meta content="telephone=no" name="format-detection"/><link href="/favicon.ico" rel="shortcut icon"/><style>html,body,form,table,div,h1,h2,h3,h4,h5,h6,img,ol,ul,li,button{margin:0;padding:0;border:0;}table{border-collapse:collapse;border-width:0;empty-cells:show;}html,body{height:100%}#gs_top{position:relative;box-sizing:border-box;min-height:100%;min-width:964px;-webkit-tap-highlight-color:rgba(0,0,0,0);}#gs_top>*:not(#x){-webkit-tap-highlight-color:rgba(204,204,204,.5);}.gs_el_ph #gs_top,.gs_el_ta #gs_top{min-width:320px;}#gs_top.gs_nscl{position:fixed;width:100%;}body,td,input,button{font-size:13px;font-family:Arial,sans-serif;line-height:1.24

In [5]:
if __name__ == "__main__":
    # merge all search results
    LS.merge_search_results()
    
    # send .PDF publication of all potential related literatures to ChatPDF.con and ask for relatedness 
    # then record the answer to the list_of_potential_related_literature.csv as well
    # ChatPDF_relatedness(path_urls, chatpdf_related_queries)

    # now we have a list of potential related literature and the information about relatedness 
    # stored in the file "list_of_potential_related_literature.csv"
    # now we may perform a automatic filtering and manual filtering of the literature

    # automatic filtering
    #auto_filter(path_potential)

    # manual filtering
    # manual_filter(path_potential, path_related_urls)

0


ValueError: Excel file format cannot be determined, you must specify an engine manually.

In [None]:
'''
# test the redirect of the urls
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9'} 

response_pdf = requests.get('https://doi.org/10.1016/j.neuron.2020.01.005', allow_redirects=True, headers = headers)
print(response_pdf.history)
print(response_pdf.url)
response_pdf_1 = requests.get('https://linkinghub.elsevier.com/retrieve/pii/S0896627320300052', allow_redirects=True, headers = headers)
print(response_pdf_1.history)
print(response_pdf_1.url)


response_pdf = requests.get('https://onlinelibrary.wiley.com/doi/10.1111/ejn.13910', headers = headers)
soup_pdf = BeautifulSoup(response_pdf.content,'lxml')
print(soup_pdf)
'''

This is the end of semi-automated literature search.

Now we have a list of actually related literature stored in list_of_related_literature.txt

Next step: we perform a information search on the list of related literature
We have a list of actually related literature at the moment, now we need to extract information we need from the literature. We intend to achieve this with a combination of automated searching and manual extraction

In [None]:
# semi-automated information search

In [None]:
import re
if '//doi.org/' in 'https://doi.org/10.1016/0165-0173(96)00003-3':
    print("yes")