# Test some examples for extraction

In [None]:
from src.open_alex_helpers import AuthorRelations, AuthorsWithRetry, WorksWithRetry, find_phd_and_supervisors_in_row, get_supervisors_openalex_ids

from pyalex import Authors, Works
from src.io_helpers import remove_commas

In [None]:
# Automatically reloads any modules that are imported, 
# so that any changes made to the module files are reflected # without needing to restart the Jupyter kernel.
# load autoreload module
%load_ext autoreload
# mode 1 reloads only when an import statement is called. For production
# mode 2 reloads before execution of every cell
%autoreload 2

## Verify that the Classes AuthorWithRetries and WorksWithRetry behave the same as the original Authors and Works pyalex classes

In [None]:
candidates_with_retry = AuthorsWithRetry().search("John Doe").get()

candidates = Authors().search("John Doe").get()

# Check if the results are the same
if [dict(c) for c in candidates_with_retry] == [dict(c) for c in candidates]:
    print("Both results match at the data level.")
else:
    print("The results differ.")

In [None]:
works_with_retry = WorksWithRetry().filter(author={"id": "https://openalex.org/A1234"}).get()

works = Works().filter(author={"id": "https://openalex.org/A1234"}).get()

# Check if the results are the same 
if [dict(w) for w in works_with_retry] == [dict(w) for w in works]:
    print("Both results at the data level.")
else:
    print("The results differ.")

## Illegal title

In [None]:
# Get necessary fields
phd_name = "Pandu Permana, R."
#title = "Something completely different" # fake title
title = "Economic development environmental stress and sustainability in Indonesia. A case study on community transformation and local resource use in Berau East Kalimantan"
#year = int(0) # fake year
year = int(2009) # actual year
institution = "University of Groningen"
contributors = [None] * 11

# Create an instance of AuthorRelations with desired verbosity ('NONE', 'MEDIUM', 'DETAILED')
years_tolerance = -1  # years tolerance
author_relations = AuthorRelations(
    phd_name=phd_name,
    title=title,
    year=year,
    institution=institution,
    contributors=contributors,
    years_tolerance=years_tolerance,
    verbosity='DEBUG'
)

# Search for the PhD candidate using both criteria
author_relations.search_phd_candidate(criteria='either')

# Find potential supervisors among the contributors
author_relations.collect_supervision_metadata()

# Get the OpenAlex ID pairs
results = author_relations.get_results()
print(results)

In [None]:
# Real title with commas and other special characters
title = "'Dese bekommerlijke tijden': armenzorg, armen en armoede in de stad Groningen 1594-1795"

title_clean = remove_commas(title)

print(title_clean)

results = WorksWithRetry().search_filter(title=title_clean).get()

results