In [18]:
import pandas as pd
import numpy as np
from utils_ergo import choose_prefix, correct_rounds_from_lookup

pd.set_option('display.max_rows', 500)
pd.set_option('display.min_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 2000)
pd.set_option('max_colwidth', 400)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Notebook Overview

This notebook demonstrates how to assign the correct **Rotterdam Study round (prefix)** to participant visits based on visit_date, study_id (cohort), and visit_nr.


The notebook expects a DataFrame with the following columns:

- **`ergo_id`** *(int)*: Unique identifier for each participant  
- **`visit_date`** *(str in `'YYYY-MM-DD'` format)*: The date the participant attended the visit  
- **`study_id`** *(str)*: The Rotterdam Study cohort name (e.g., `'Rotterdam Study 1'`)

### Goal

Assign the appropriate **visit round prefix** (e.g., `'e1'`, `'e2'`, `'e3'`, etc.) to each participant visit, accounting for:

- Participants arriving **late** to one or more visits  
- Participants arriving **early**  
- Participants who **missed** a visit entirely

### Test data

In [19]:
data = {'ergo_id': [1, 1, 1, 1,
                    2, 2, 2,
                    3, 3],
 'visit_date': ['1993-09-02', '1996-01-01', '2000-01-01', '2002-01-01',  # Late for multiple visits
                '1990-01-01', '1994-01-01', '1995-02-01',                # Early for one visit  
                '1990-01-01', '1998-01-01'],                             # Missing one visit 
 'study_id': ['Rotterdam Study 1', 'Rotterdam Study 1', 'Rotterdam Study 1', 'Rotterdam Study 1',
              'Rotterdam Study 1', 'Rotterdam Study 1', 'Rotterdam Study 1',
              'Rotterdam Study 1', 'Rotterdam Study 1']
}

df = pd.DataFrame(data)

### visit_nr 

In [20]:
# Assuming df is your DataFrame
df['visit_date'] = pd.to_datetime(df['visit_date'], format='%Y-%m-%d', errors='coerce'); 
df = df.sort_values(by = ['ergo_id', 'visit_date']);

# Detemine visit numbers based on the ascending dates 
df['visit_nr'] = df.groupby(['ergo_id']).cumcount() + 1;

### round

In [14]:
df['round'] = df.apply(choose_prefix, axis=1)

In [15]:
print(df)

   ergo_id visit_date           study_id  visit_nr round
0        1 1993-09-02  Rotterdam Study 1         1    e2
1        1 1996-01-01  Rotterdam Study 1         2    e2
2        1 2000-01-01  Rotterdam Study 1         3    e3
3        1 2002-01-01  Rotterdam Study 1         4    e4
4        2 1990-01-01  Rotterdam Study 1         1    e1
5        2 1994-01-01  Rotterdam Study 1         2    e2
6        2 1995-02-01  Rotterdam Study 1         3    e3
7        3 1990-01-01  Rotterdam Study 1         1    e1
8        3 1998-01-01  Rotterdam Study 1         2    e3


### Correct round for edge cases

In [16]:
df = correct_rounds_from_lookup(df, round_column='round', visit_order_lookup=visit_order_lookup)

In [None]:
print(df)

Unnamed: 0,ergo_id,visit_date,study_id,visit_nr,round
0,1,1993-09-02,Rotterdam Study 1,1,e1
1,1,1996-01-01,Rotterdam Study 1,2,e2
2,1,2000-01-01,Rotterdam Study 1,3,e3
3,1,2002-01-01,Rotterdam Study 1,4,e4
4,2,1990-01-01,Rotterdam Study 1,1,e1
5,2,1994-01-01,Rotterdam Study 1,2,e2
6,2,1995-02-01,Rotterdam Study 1,3,e3
7,3,1990-01-01,Rotterdam Study 1,1,e1
8,3,1998-01-01,Rotterdam Study 1,2,e3
