In [None]:
import os
import json
import requests
import time
import shutil
from retry import retry
from requests.exceptions import RequestException
import logging

@retry(tries=3, delay=2, backoff=2)
# Define a function to process each JSON file in a folder
def process_json_files(folder_path, output_folder):
    # Iterate over each file in the folder
    for filename in os.listdir(folder_path):
        # Ensure the file is a JSON file
        if filename.endswith('.json'):
            print(filename)
            # Build the full file path
            filepath = os.path.join(folder_path, filename)
            # Open and parse the JSON file
            with open(filepath, 'r') as file:
                data = json.load(file)
                # Extract the package_name of each node and process
                for node in data['nodes']:
                    package_name = node.get('package_name', '')
                    Status = node.get('Status', '')
                    download_number = None
                    if 'download_number' not in node or node['download_number'] == None or node['download_number'] > 100:
                        version = None
                        # Split package_name to get version
                        if '-' in package_name:
                            package_name_parts = package_name.split('-')
                            version = package_name_parts[-1]
                            package_name = '-'.join(package_name_parts[:-1])
                        # if '@' in package_name:
                        #     package_name_parts = package_name.split('@')
                        #     version = package_name_parts[-1]
                        #     package_name = '@'.join(package_name_parts[:-1])

                        # Split package_name using '/' and join with '%2F'
                        processed_package_name = '%2F'.join(package_name.split('/'))
                        print(processed_package_name)

                        url = 'https://api.npmjs.org/downloads/point/last-week/' + processed_package_name
                        print(url)
                        try:
                            r = requests.get(url)
                            r.raise_for_status()
                            res = r.json()
                            download_number = int(res['downloads'])
                                
                        except RequestException as e:
                            print("Request Error occurred:", e)
                            time.sleep(5)  # Wait for 5 seconds before retrying
                            # continue

                        print("download_number", package_name, "is", download_number)
                        # Update the download_number in the node
                        node['download_number'] = download_number
                        # Limit API request rate to 60 per second
                        time.sleep(1 / 60)
            
            # Write the updated data back to the original file
            with open(filepath, 'w') as file:
                json.dump(data, file, indent=4)
            
            # Move the file to the output folder
            output_filepath = os.path.join(output_folder, filename)
            shutil.move(filepath, output_filepath)
            print("Moved", filename, "to", output_folder)
                
# Specify the input and output folder paths
input_folder_path = ''
output_folder_path = ''

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Call the function to process JSON files and move them to the output folder
process_json_files(input_folder_path, output_folder_path)

In [None]:
import os
import json
import requests
import time
import shutil
from retry import retry
from requests.exceptions import RequestException
from tqdm import tqdm  # Import tqdm library

@retry(tries=3, delay=2, backoff=2)
# Define a function to process each JSON file in a folder
def process_json_files(folder_path, output_folder):
    # Get the number of JSON files in the folder
    num_files = len([filename for filename in os.listdir(folder_path) if filename.endswith('.json')])
    # Create a progress bar using tqdm and set the total parameter to the number of files
    with tqdm(total=num_files) as pbar:
        # Iterate over each file in the folder
        for filename in os.listdir(folder_path):
            # Ensure the file is a JSON file
            if filename.endswith('.json'):
                print(filename)
                # Build the full file path
                filepath = os.path.join(folder_path, filename)
                # Open and parse the JSON file
                with open(filepath, 'r') as file:
                    data = json.load(file)
                    # Extract the package_name of each node and process
                    for node in data['nodes']:
                        package_name = node.get('package_name', '')
                        Status = node.get('Status', '')
                        download_number = None
                        if 'download_number' not in node or node['download_number'] == None or node['download_number'] > 100:
                            # Split package_name to get version
                            if '-' in package_name:
                                package_name_parts = package_name.split('-')
                                version = package_name_parts[-1]
                                if version.startswith('v'):
                                    version=version[1:]
                                package_name = '-'.join(package_name_parts[:-1])
                            # if '@' in package_name:
                            #     package_name_parts = package_name.split('@')
                            #     version = package_name_parts[-1]
                            #     package_name = '@'.join(package_name_parts[:-1])
                            else:
                                version = None

                            # Split package_name using '/' and join with '%2F'
                            processed_package_name = '%2F'.join(package_name.split('/'))
                            print(processed_package_name)

                            BASE_URL = "https://pypistats.org/api/"
                            USER_AGENT = "pypistats/0.11.0"
                            endpoint = "packages/" + processed_package_name + "/recent"
                            url = BASE_URL + endpoint.lower()
                            
                            try:
                                r = requests.get(url, headers={"User-Agent": USER_AGENT})
                                r.raise_for_status()
                                res = r.json()
                                download_number = int(res["data"]["last_month"])
                            except RequestException as e:
                                print("Request Error occurred:", e)
                                time.sleep(5)  # Wait for 5 seconds before retrying
                                # continue
                            
                            print("download_number for", package_name, "is", download_number)
                            # Update the download_number in the node
                            node['download_number'] = download_number
                            # Limit API request rate to 60 per second
                            time.sleep(1 / 60)
                
                # Write the updated data back to the original file
                with open(filepath, 'w') as file:
                    json.dump(data, file, indent=4)
            
                # Move the file to the output folder
                output_filepath = os.path.join(output_folder, filename)
                shutil.move(filepath, output_filepath)
                print("Moved", filename, "to", output_folder)

# Specify the input and output folder paths
input_folder_path = ''
output_folder_path = ''
# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Call the function to process JSON files and move them to the output folder
process_json_files(input_folder_path, output_folder_path)


In [None]:
import os
import json
import requests
import time
import shutil
from retry import retry
from requests.exceptions import RequestException
from tqdm import tqdm  # Import tqdm library

@retry(tries=3, delay=2, backoff=2)
# Define a function to process each JSON file in a folder
def process_json_files(folder_path, output_folder):
    # Get the number of JSON files in the folder
    num_files = len([filename for filename in os.listdir(folder_path) if filename.endswith('.json')])
    # Create a progress bar using tqdm and set the total parameter to the number of files
    with tqdm(total=num_files) as pbar:
        # Iterate over each file in the folder
        for filename in os.listdir(folder_path):
            # Ensure the file is a JSON file
            if filename.endswith('.json'):
                print(filename)
                # Build the full file path
                filepath = os.path.join(folder_path, filename)
                # Open and parse the JSON file
                with open(filepath, 'r') as file:
                    data = json.load(file)
                    # Extract the package_name of each node and process
                    for node in data['nodes']:
                        package_name = node.get('package_name', '')
                        Status = node.get('Status', '')
                        download_number = None
                        if 'download_number' not in node or node['download_number'] == None or node['download_number'] > 100:
                            version = None
                            # Split package_name to get version
                            if '-' in package_name:
                                package_name_parts = package_name.split('-')
                                version = package_name_parts[-1]
                                if version.startswith('v'):
                                    version=version[1:]
                                package_name = '-'.join(package_name_parts[:-1])
                            # if '@' in package_name:
                            #     package_name_parts = package_name.split('@')
                            #     version = package_name_parts[-1]
                            #     package_name = '@'.join(package_name_parts[:-1])
                            else:
                                version = None

                            # Split package_name using '/' and join with '%2F'
                            processed_package_name = '%2F'.join(package_name.split('/'))
                            print(processed_package_name)

                            if version is None: 
                                # Construct request URL
                                request_url = f'https://rubygems.org/api/v1/gems/{processed_package_name}.json'
                                print(request_url)

                                try:
                                    resp = requests.request('GET', request_url)
                                    resp.raise_for_status()
                                    pkg_info = resp.json()
                                    download_number = pkg_info.get('downloads', None)
                                except RequestException as e:
                                    print("Request Error occurred:", e)
                                    time.sleep(5)  # Wait for 5 seconds before retrying
                                    # continue
                            
                                print("download_number for", package_name, "is", download_number)
                                # Update the download_number in the node
                                node['download_number'] = download_number

                            else:
                                request_url = f'https://rubygems.org/api/v2/rubygems/{processed_package_name}/versions/{version}.json'
                                print(request_url)

                                try:
                                    resp = requests.request('GET', request_url)
                                    resp.raise_for_status()
                                    pkg_info = resp.json()
                                    download_number = pkg_info.get('downloads', None)
                                except RequestException as e:
                                    print("Request Error occurred:", e)
                                    time.sleep(5)  # Wait for 5 seconds before retrying
                                    continue
                            
                                print("download_number for", package_name, "is", download_number)
                                # Update the download_number in the node
                                node['download_number'] = download_number

                            # Limit API request rate to 60 per second
                            time.sleep(1 / 60)
                
                # Write the updated data back to the original file
                with open(filepath, 'w') as file:
                    json.dump(data, file, indent=4)
            
                # Move the file to the output folder
                output_filepath = os.path.join(output_folder, filename)
                shutil.move(filepath, output_filepath)
                print("Moved", filename, "to", output_folder)

# Specify the input and output folder paths
input_folder_path = ''
output_folder_path = ''
# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Call the function to process JSON files and move them to the output folder
process_json_files(input_folder_path, output_folder_path)
