In [1]:
import os
import pandas as pd
import requests
from urllib.parse import urlparse

def downloadAllFilesPdf(folderLocation):
    
    # Read the csv file
    df = pd.read_csv(folderLocation)

    # Parse the folder location to get base directory and file name
    base_dir, csv_file = os.path.split(folderLocation)
    file_base_name, _ = os.path.splitext(csv_file)

    # Create new directories for downloaded papers and failed downloads
    papers_dir = os.path.join(base_dir, f'{file_base_name} Papers')
    os.makedirs(papers_dir, exist_ok=True)

    # List to store titles and indices of failed downloads
    failed_downloads = []

    # Iterate through the DataFrame
    for index, row in df.iterrows():
        url = row['FullTextURL']
        title = row['Title']

        # Check if the URL is a string
        if isinstance(url, str) and isinstance(title, str):
            # Making the title file-system friendly
            title = "".join([c for c in title if c.isalpha() or c.isdigit() or c==' ']).rstrip()
            
            # Create a file path for the paper
            file_path = f'{papers_dir}/{title}.pdf'

            # Check if the file already exists
            if not os.path.isfile(file_path):
                # Download the file and save it as a pdf
                try:
                    response = requests.get(url, stream=True)
                    if response.status_code == 200:
                        with open(file_path, 'wb') as out_file:
                            out_file.write(response.content)
                    else:
                        failed_downloads.append((index, title))
                except Exception as e:
                    failed_downloads.append((index, title))
            else:
                print(f'File "{title}.pdf" already exists.')
        else:
            failed_downloads.append((index, title))

    # Write failed downloads to a text file
    with open(f'{base_dir}/failed_downloads.txt', 'w', encoding='utf-8') as f:
        for item in failed_downloads:
            f.write(f'Index: {item[0]}, Title: {item[1]}\n')


In [2]:
folder = 'Ref/Blockchain Meta-Governance/BlockchainMetaGov.csv'
folder1 = 'Ref/Smart Contract Composability/SmartContractComp.csv'
downloadAllFilesPdf(folder)
downloadAllFilesPdf(folder1)

File "A case study of a digital data platform for the agricultural sector a valuable decision support system for small farmers.pdf" already exists.
File "A Case Study of a Digital Data Platform for the Agricultural Sector A Valuable Decision Support System for Small Farmers.pdf" already exists.
File "A cultural political economy of South Koreas development model in variegated capitalism.pdf" already exists.
File "A cybernetic service design approach for taming persuasive service systems Reflective case studies for design practice.pdf" already exists.
File "A Study on the Measurement of POI Data in Shandong Urban Governance Under Big Data.pdf" already exists.
File "A Survey on Decentralized Autonomous Organizations DAOs and Their Governance.pdf" already exists.
File "Admins mods and benevolent dictators for life The implicit feudalism of online communities.pdf" already exists.
File "Are partnerships in nonprofit organizations being governed for sustainability A partnering life cycle ass