# Compare EC orphan drugs with all EC registered active drugs

Source of data: 

1. Active: https://ec.europa.eu/health/documents/community-register/html/reg_od_act.htm?sort=n
2. Withdrawn, suspended, expired, not renewed: https://ec.europa.eu/health/documents/community-register/html/reg_hum_nact.htm?sort=a
3. Rejected: https://ec.europa.eu/health/documents/community-register/html/reg_hum_refus.htm

## Import data

### EC all drugs

In [56]:
import os

import numpy as np
import pandas as pd

path = os.path.dirname(os.getcwd())
data_all_drugs = pd.read_csv(path + '/data/ec_all_drugs.csv')
data_all_drugs['EU #'] = data_all_drugs['EU #'].astype(str)
data_all_drugs

Unnamed: 0,EU #,Brand name,Marketing Authorisation Holder,Status,INN,Initial Decision date
0,EU/1/14/944,Abasaglar,Eli Lilly Nederland B.V.,Active,,
1,EU/1/04/276,Abilify,Otsuka Pharmaceutical Netherlands B.V.,Active,,
2,EU/1/13/882,ABILIFY MAINTENA,Otsuka Pharmaceutical Netherlands B.V.,Active,,
3,EU/1/07/428,Abraxane,Bristol-Myers Squibb Pharma EEIG,Active,,
4,EU/1/07/412,Abseamed,Medice Arzneimittel Pütter GmbH & Co KG,Active,,
...,...,...,...,...,...,...
1548,,Winfuran,Toray International U.K. Limited,Rejected,nalfurafine,21 Mar 2014
1549,,Xeljanz (Refusal),Pfizer Limited,Rejected,tofacitinib,08 Nov 2013
1550,,Yondelis,Pharma Mar S.A.,Rejected,Trabectedin,07 Sep 2004
1551,,Zeftera,Janssen-Cilag International NV,Rejected,Ceftobiprole medocaril,16 Sep 2010


### EC orphan drug designations

In [57]:
path = os.path.dirname(os.getcwd())
data_od_raw = pd.read_csv(path + '/data/ec_orphan_designations.csv', header=2)
data_od_raw['EU #'] = data_od_raw['EU #'].astype(str)
data_od_raw

Unnamed: 0,EU #,Product,Indication,Sponsor,Designation date,Tradename - EU product # - Implemented on
0,EU/3/20/2351,Adeno-associated virus serotype 5 containing t...,Treatment of RDH12 mutation associated retinal...,MeiraGTx B.V.,19 Oct 2020,-
1,EU/3/20/2350,"Poly(oxy-1,2-ethanediyl), alpha-hydro-omega-me...",Treatment of hypoparathyroidism,Ascendis Pharma Bone Diseases A/S,19 Oct 2020,-
2,EU/3/20/2349,Miglustat,Treatment of neuronal ceroid lipofuscinosis,Theranexus S.A.S.,19 Oct 2020,-
3,EU/3/20/2348,"Poly(oxy-1,2-ethanediyl), alpha-(carboxymethyl...",Treatment of homocystinuria,Aeglea Biotherapeutics UK Limited,19 Oct 2020,-
4,EU/3/20/2347,Trehalose,Treatment of neuronal ceroid lipofuscinosis,Theranexus S.A.S.,19 Oct 2020,-
...,...,...,...,...,...,...
1735,EU/3/01/034,Gusperimus trihydrochloride,Treatment of Wegener’s granulomatosis,Nordic Group B.V.,29 Mar 2001,-
1736,EU/3/01/028,Inolimomab,Treatment of Graft versus Host Disease,Elsalys Biotech SA,05 Mar 2001,-
1737,EU/3/01/026,L-Lysine-N-acetyl-L-cysteinate,Treatment of cystic fibrosis,LABORATOIRES SMB SA,14 Feb 2001,-
1738,EU/3/00/013,Ethyl Eicosopentaenoate,Treatment of Huntington's disease,Amarin Neuroscience Limited,29 Dec 2000,-


## Only keep orphan drug designations between 2001 and 2019

In [61]:
years = pd.to_datetime(
    data_od_raw['Designation date']).apply(lambda x: x.year)
data_od = data_od_raw.where((years > 2000) & (years < 2020))
data_od = data_od.dropna()
data_od['EU #'] = data_od['EU #'].astype(str)
data_od

Unnamed: 0,EU #,Product,Indication,Sponsor,Designation date,Tradename - EU product # - Implemented on
109,EU/3/19/2235,Synthetic double-stranded siRNA oligonucleotid...,Treatment of congenital alpha-1 antitrypsin de...,Dicerna Ireland Limited,16 Dec 2019,-
110,EU/3/19/2234,Pamrevlumab,Treatment of Duchenne muscular dystrophy,Voisin Consulting S.A.R.L.,16 Dec 2019,-
111,EU/3/19/2233,Navitoclax,Treatment of myelofibrosis,AbbVie Deutschland GmbH & Co. KG,16 Dec 2019,-
112,EU/3/19/2232,Lactobacillus plantarum,Treatment of amyotrophic lateral sclerosis,MDC RegAffairs GmbH,16 Dec 2019,-
113,EU/3/19/2231,H-Leu-Pro-Pro-Leu-Pro-Tyr-Pro-OH,Treatment of amyotrophic lateral sclerosis,AdRes EU B.V.,16 Dec 2019,-
...,...,...,...,...,...,...
1733,EU/3/01/044,Human Alpha1-Proteinase Inhibitor (respiratory...,Treatment of emphysema secondary to congenital...,CSL Behring GmbH,09 Jul 2001,-
1734,EU/3/01/038,Retroviral gamma-c cDNA containing vector,Treatment of Severe Combined Immunodeficiency ...,GENOPOIETIC S.A.S.,30 May 2001,-
1735,EU/3/01/034,Gusperimus trihydrochloride,Treatment of Wegener’s granulomatosis,Nordic Group B.V.,29 Mar 2001,-
1736,EU/3/01/028,Inolimomab,Treatment of Graft versus Host Disease,Elsalys Biotech SA,05 Mar 2001,-


## 1. How many ODs can be identified by EU # in the all drugs registry

In [62]:
import plotly.colors
import plotly.graph_objects as go

# Get all EU # in all drug registry
eu_numbers_all_drugs = data_all_drugs['EU #'].unique()

# Get all EU # in OD drug registry
eu_numbers_od_drugs = data_od['EU #'].unique()

# Check whether OD drug number can be found
is_found = np.zeros(shape=len(eu_numbers_od_drugs), dtype=bool)
for idx, number in enumerate(eu_numbers_od_drugs):
    if number in eu_numbers_all_drugs:
        is_found[idx] = True

# Create dataframe
matched_data = pd.DataFrame(columns=['EU #', 'Match status'])
matched_data = matched_data.append(pd.DataFrame({
    'EU #': eu_numbers_od_drugs[is_found],
    'Match status': 'In all drugs registry'}))
matched_data = matched_data.append(pd.DataFrame({
    'EU #': eu_numbers_od_drugs[~is_found],
    'Match status': 'Not in all drugs registry'}))

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly

n_in_all_drugs = len(
    matched_data[matched_data['Match status'] == 'In all drugs registry'])
n_not_in_all_drugs = len(
    matched_data[matched_data['Match status'] == 'Not in all drugs registry'])

fig.add_trace(go.Pie(
    labels=['In all drugs registry', 'Not in all drugs registry'],
    values=[n_in_all_drugs, n_not_in_all_drugs],
    sort=False,
    pull=[0, 0.2],
    marker=dict(
        colors=colours[:2], 
        line=dict(color='#000000', width=1))))
fig.show()