# Type Statistics
This notebook retrieves our link collection as a pandas DataFrame and updates the 'type' field according to the proposed changes.

In [1]:
import pandas as pd
from generate_link_lists import load_dataframe
import yaml
import os

ModuleNotFoundError: No module named 'generate_link_lists'

## Data Import
Load the YAML data as a pandas DataFrame for processing.

In [None]:
df = load_dataframe("../resources/")
df.head(4)

## Define Update Rules
Create a mapping of old types to new types, including types to be removed.

In [None]:
type_updates = {
    'Videos': 'Video',
    'Publiction': 'publication',
    'Slide': 'Slides',
    'Online Tutorial': 'Tutorial',
    'Online Course': 'Tutorial',
    'Practicals': 'Tutorial',
    'Report': 'Document',
    'Presentation': 'Slides'
}

types_to_remove = {'Big Data', 'Python', 'Bioimage Analysis'}

## Update Types
Apply updates to the 'type' field in the DataFrame.

In [None]:
def update_types(type_list):
    if not isinstance(type_list, list):
        return type_list
    updated_list = []
    for t in type_list:
        if t in types_to_remove:
            continue
        updated_list.append(type_updates.get(t, t))
    return list(set(updated_list))

df['type'] = df['type'].apply(update_types)

## Save the Updated DataFrame
Save the updated types back to a YAML file.

In [None]:
updated_data = df.to_dict(orient='records')
with open('../resources/updated_nfdi4bioimage.yml', 'w') as file:
    yaml.dump({'resources': updated_data}, file)

## Display Updated Types
Display the unique types after updating.

In [None]:
unique_types = df['type'].explode().unique()
unique_types