In [139]:
from serpapi import GoogleScholarSearch
import json
import csv
import os
from credentials import API_KEY

In [None]:
def append_to_json_file(file_path:str, data:dict):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            existing_data = json.load(file)
    except FileNotFoundError:
        existing_data = []

    existing_data.extend(data)

    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(existing_data, file, indent=4)

def query_google_scholar(query):
    # Set up SerpApi parameters
    api_key = None  # Replace with your SerpApi API key
    params = {
        'q': query,
        'api_key': api_key,
        'as_ylo': 2019,  # Filter results from 2019 onwards
        'page_size': 20,  # Set the page size to 20
        'as_rr': 0,  # If 1, filter only review papers
    }

    # Perform the search
    search = GoogleScholarSearch(params)

    # Save all results to a JSON file
    save_path = 'google_scholar_results.json'
    
    while True:
        results = search.get_dict()

        if results.get('search_metadata').get('status') != 'Success':
            print(f"ERROR.{results}")
            break

        if results:
            append_to_json_file(save_path, results)

        if not search.pagination():
            break

    print(f"All results saved to {save_path}")
    if search.num_pages():
        print(f"All pages extracted: {search.num_pages()}")

# Example usage:
query_string = 'multiple sclerosis segmentation deep learning'
query_google_scholar(query_string)

In [134]:
def extract_and_append_to_csv(result_list, csv_filename='extracted_info.csv'):
    # Check if the CSV file exists
    file_exists = os.path.exists(csv_filename)

    # Save extracted information to a CSV file (append mode)
    fieldnames = ['title', 'link', 'citation', 'summary', 'resource_title', 'file_format']

    with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # Write header only if the file is empty
        if not file_exists:
            writer.writeheader()

        # Iterate through the list of results
        for result in result_list:
            extracted_info = []

            for item in result.get('organic_results', []):
                info = {}

                # Check if the fields exist before extracting information
                title = item.get('title')
                link = item.get('link')
                citation = item.get('inline_links', {}).get('cited_by', {}).get('total')
                summary = item.get('publication_info', {}).get('summary')

                resources = item.get('resources', [])
                if resources:
                    resource = resources[0]  # Assuming you want information from the first resource
                    resource_title = resource.get('title')
                    file_format = resource.get('file_format')
                else:
                    resource_title = None
                    file_format = None

                # Add information to the extracted_info list
                info['title'] = title
                info['link'] = link
                info['citation'] = citation
                info['summary'] = summary
                info['resource_title'] = resource_title
                info['file_format'] = file_format

                extracted_info.append(info)

            # Write data rows
            writer.writerows(extracted_info)

In [138]:
with open("articles_review.json", "r") as f:
    search_results = json.load(f)
extract_and_append_to_csv(search_results, "article_review.csv")