In [1]:
import time

In [2]:
import random

In [3]:
import pandas as pd

In [4]:
from scholarly import scholarly
from scholarly._navigator import MaxTriesExceededException


In [5]:
# Function to search Google Scholar
def search_scholar(query, max_results=1000):
    print(f"Searching Google Scholar with query: {query}")
    search_query = scholarly.search_pubs(query)
    articles = []
    tries = 0
    max_tries = 5  # Maximum number of retries

    for i in range(max_results):
        try:
            article = next(search_query)
            articles.append(article)
            # Adding a delay between 5 and 10 seconds
            time.sleep(random.uniform(5, 10))
            tries = 0  # Reset tries after a successful fetch
        except StopIteration:
            break
        except MaxTriesExceededException:
            tries += 1
            if tries >= max_tries:
                print("Max tries exceeded. Stopping the search.")
                break
            wait_time = 60 * (2 ** (tries - 1))  # Exponential backoff
            print(f"Retrying in {wait_time} seconds... ({tries}/{max_tries})")
            time.sleep(wait_time)
        except Exception as e:
            print(f"An error occurred: {e}")
            break

    print(f"Found {len(articles)} articles")
    return articles

# Function to parse the article details and extract required information
def parse_article_details(article):
    print(f"Parsing details for article: {article['bib']['title']}")
    title = article['bib']['title']
    authors = ', '.join(article['bib']['author'])
    doi = article['bib'].get('doi', 'N/A')
    abstract = article['bib'].get('abstract', 'N/A')
    date = article['bib'].get('year', 'N/A')
    return {
        'Title': title,
        'Author': authors,
        'DOI': doi,
        'Abstract': abstract
    }

def extract_main_findings(abstract):
    sentences = abstract.split('.')
    keywords = ['results', 'findings', 'we found', 'our study shows', 'conclusion']
    for sentence in sentences:
        if any(keyword in sentence.lower() for keyword in keywords):
            return sentence.strip()
    return 'N/A'

# Main function to perform the search and generate the CSV file
def main():
    query = '((malaria OR falciparum) AND (kelch13 OR Pfkelch13 OR k13 OR PfK13)' #AND (Kenya OR Uganda OR Ethiopia OR Somalia OR Rwanda OR Tanzania OR Congo OR DRC OR Eritrea OR Sudan OR South Sudan OR Madagascar OR Mozambique OR Comoros OR Djibouti OR Burundi OR Malawi OR Zambia)) AND (("2020/01/01" [Date - Publication]: "2024/09/24" [Date - Publication]))'
    articles = search_scholar(query, max_results=1000)  # Reduce max_results to avoid rate limits
    #parsed_articles = []
    
    """for article in articles:
        article_details = parse_article_details(article)
        article_details1['Main Findings'] = extract_main_findings(article_details['Abstract'])
        parsed_articles.append(article_details)
    
    df = pd.DataFrame(articles)
    csv_filename = 'K13_review_google_scholar.csv'
    df.to_csv(csv_filename, index=False)
    print(f"Data saved to {csv_filename}")"""

if __name__ == '__main__':
    main()

Searching Google Scholar with query: ((malaria OR falciparum) AND (kelch13 OR Pfkelch13 OR k13 OR PfK13)
Retrying in 60 seconds... (1/5)
Retrying in 120 seconds... (2/5)
Retrying in 240 seconds... (3/5)
Retrying in 480 seconds... (4/5)
Max tries exceeded. Stopping the search.
Found 550 articles
