In [None]:
!pip install -U jk-pypiorgapi jk-json clean-text

In [42]:
!pip freeze > requirements.txt

In [25]:
# run this at the first time to touch the file
with open("pypi_api.data.json", "a"):
  pass

In [None]:
import jk_pypiorgapi
import jk_json
import json
import logging
import cleantext.clean

COMMON_USELESS_INFO:list = ["UNKNOWN", "", "..."]
MINIMUM_DOC_LENGTH:int = 24
MINIMUM_SUMMARY_LENGTH:int = 16

api:object = jk_pypiorgapi.PyPiOrgAPI()

PACKAGES:list = api.listAllPackages()

def clean_data(text:str) -> str:
  return cleantext.clean(
      text,
      fix_unicode=True,
      to_ascii=True,
      lower=False,
      no_line_breaks=False,
      no_urls=False,
      no_emails=False,
      no_phone_numbers=False,
      no_numbers=False,
      no_digits=False,
      no_currency_symbols=False,
      no_punct=False,
      #replace_with_punct="",
      #replace_with_url="<URL>",
      #replace_with_email="<EMAIL>",
      #replace_with_phone_number="<PHONE>",
      #replace_with_number="<NUMBER>",
      #replace_with_digit="0",
      #replace_with_currency_symbol="<CUR>",
      lang="en"                  
    )

def fetch_data() -> dict:
  
  packages_with_info:int = 0
  packages_without_info:int = 0  
  packages_info:dict = {}

  for package in PACKAGES:
    
    package_name:str = package[1]
    package_data:dict = api.getPackageInfoJSON(package_name)

    try:
      package_info:dict = package_data["info"]
      description:str = clean_data(package_info["description"])
      summary:str = clean_data(package_info["summary"])
      if (
          len(description) > MINIMUM_DOC_LENGTH
          or len(summary) > MINIMUM_SUMMARY_LENGTH
          and description not in COMMON_USELESS_INFO
        ):
        packages_info[package_name] = {
            "author":package_info["author"],
            "classifiers":package_info["classifiers"],
            "description":description,
            "description_content_type":package_info["description_content_type"],
            "docs_url":package_info["docs_url"],
            "home_page":package_info["home_page"],
            "keywords":package_info["keywords"],
            "license":package_info["license"],
            "summary":summary,
            "requires_python":package_info["requires_python"],
            "version":package_info["version"],
            "vulnerabilities":package_data["vulnerabilities"]
        }
        packages_with_info += 1
        #jk_json.prettyPrint(package_data)
        #if packages_with_info >= 10:
          #break

    except TypeError:
      packages_without_info += 1
    
  return packages_info


def save_data(data:dict) -> None: 
  with open("pypi_api.data.json", "w") as fp:
    json.dump(data, fp)

#print(packages_without_info)
#print(packages_with_info)
def show_data() -> None:
  with open("pypi_api.data.json", "r") as fp:
    jk_json.prettyPrint(json.load(fp))

def main():
  data = fetch_data()
  save_data(data)
  show_data()

if __name__ == "__main__":
  main()