In [1]:
from dotenv import load_dotenv

import bigger_picker.config as config
from bigger_picker.airtable import AirtableManager
from bigger_picker.asana import AsanaManager
from bigger_picker.batchtracker import BatchTracker
from bigger_picker.integration import IntegrationManager
from bigger_picker.openai import OpenAIManager
from bigger_picker.rayyan import RayyanManager

load_dotenv()

True

In [20]:
airtable = AirtableManager()
asana = AsanaManager()
openai = OpenAIManager()
rayyan = RayyanManager()
batchtracker = BatchTracker()
integration = IntegrationManager(
    asana_manager=asana,
    airtable_manager=airtable,
    openai_manager=openai,
    rayyan_manager=rayyan,
    batch_tracker=batchtracker,
    debug=True,
)

In [21]:
articles = airtable.tables["Articles"].all()
datasets = airtable.tables["Datasets"].all()
asana_tasks = asana.get_tasks(refresh=True)

In [None]:
tasks = {}

for task in asana.tasks:
    tasks[
        asana.get_custom_field_value(task, config.ASANA_CUSTOM_FIELD_IDS["BPIPD"])
        ] = task

In [24]:
datasets_dict = {dataset['id']: dataset for dataset in datasets}

In [25]:
articles_dict = {}
for article in articles:
  article_id = article['fields']['Rayyan ID']
  articles_dict[article_id] = articles_dict.get(article_id, 0) + 1 

In [26]:
duplicates = {}
for article in articles:
    article_id = article['fields']['Rayyan ID']
    duplicates[article_id] = duplicates.get(article_id, []) + [article]

duplicates = {k: v for k, v in duplicates.items() if len(v) > 1}

In [None]:
unprocessed_duplicates = {}
for article_id, articles in duplicates.items():
  print(article_id)
  dataset_1_id = articles[0]['fields'].get('Datasets', None)
  dataset_2_id = articles[1]['fields'].get('Datasets', None)

  if not dataset_1_id and dataset_2_id:
    # No dataset associated with first article, but second has one
    # So delete the first article, nothing to delete in dataset or asana
    print(f"\tArticle {articles[0]['id']} can be deleted, no dataset associated")
    airtable.tables['Articles'].delete(articles[0]['id'])
    print(f"\tDeleted article {articles[0]['id']}")
    continue
  if dataset_1_id and not dataset_2_id:
    # No dataset associated with second article, but first has one
    # So delete the second article, nothing to delete in dataset or asana
    print(f"\tArticle {articles[1]['id']} can be deleted, no dataset associated")
    airtable.tables['Articles'].delete(articles[1]['id'])
    print(f"\tDeleted article {articles[1]['id']}")
    continue

  dataset_1_id = dataset_1_id[0]
  dataset_2_id = dataset_2_id[0]
  dataset_1 = datasets_dict[dataset_1_id]
  dataset_2 = datasets_dict[dataset_2_id]

  if (dataset_1['fields']['Status'] == "Awaiting Triage" and 
      dataset_2['fields']['Status'] == "Awaiting Triage"):
    # Both have same status. OK to delete
    # Delete the article first
    print("\tBoth datasets in 'Awaiting Triage'")
    airtable.tables['Articles'].delete(articles[0]['id'])
    print(f"\tDeleted article {articles[0]['id']}")
    

    # Get the dataset ID for Asana then delete the dataset second
    dataset_bpipd = dataset_1['fields']['Dataset ID']
    airtable.tables['Datasets'].delete(dataset_1_id)
    print(f"\tDeleted dataset {dataset_1_id}")

    # Then delete the asana task
    asana_task_id = tasks[dataset_bpipd]['gid']
    asana.tasks_api_instance.delete_task(asana_task_id)
    print(f"\tDeleted asana task {asana_task_id}")
    continue

  print("\tBoth status not 'Awaiting Triage', cannot automatically resolve")
  unprocessed_duplicates[article_id] = articles

401829924
	Both datasets in 'Awaiting Triage'
	Deleted article rec5fFKSD5jcN01aj
	Deleted dataset recChuld7zemNhI4t
	Deleted asana task 1212358224060409
267165813
	Both datasets in 'Awaiting Triage'
	Deleted article recA93KKPOwA6ZsaZ
	Deleted dataset recEp2TT8xz1H8alX
	Deleted asana task 1212332226168224
290423879
	Both status not 'Awaiting Triage', cannot automatically resolve
401834712
	Both status not 'Awaiting Triage', cannot automatically resolve
401841698
	Both datasets in 'Awaiting Triage'
	Deleted article recAcc4lLN1tsPmYY
	Deleted dataset recTaG53zXKHGpsVO
	Deleted asana task 1212284451182885
401817116
	Both datasets in 'Awaiting Triage'
	Deleted article recB0mdblsf5xdLwt
	Deleted dataset recSZ8cemJPUm10Bd
	Deleted asana task 1212335594205174
290417734
	Both datasets in 'Awaiting Triage'
	Deleted article recBAlRCn8qwZrXJe
	Deleted dataset recc1CZVHh0TZFlJF
	Deleted asana task 1212278547905088
401850709
	Both datasets in 'Awaiting Triage'
	Deleted article recBaW9Esp6CP0XBr
	Del

In [19]:
unprocessed_duplicates.keys()

dict_keys([290423879, 401834712, 290423763, 267166919, 267162243])