# EC Orphan Drug Records

Source of data: https://ec.europa.eu/health/documents/community-register/html/reg_od_act.htm?sort=n

In [2]:
import os

import numpy as np
import pandas as pd

path = os.path.dirname(os.getcwd())
data = pd.read_csv(path + '/data/ec_orphan_designations.csv', header=2)
data

Unnamed: 0,EU #,Product,Indication,Sponsor,Designation date,Tradename - EU product # - Implemented on
0,EU/3/20/2351,Adeno-associated virus serotype 5 containing t...,Treatment of RDH12 mutation associated retinal...,MeiraGTx B.V.,19 Oct 2020,-
1,EU/3/20/2350,"Poly(oxy-1,2-ethanediyl), alpha-hydro-omega-me...",Treatment of hypoparathyroidism,Ascendis Pharma Bone Diseases A/S,19 Oct 2020,-
2,EU/3/20/2349,Miglustat,Treatment of neuronal ceroid lipofuscinosis,Theranexus S.A.S.,19 Oct 2020,-
3,EU/3/20/2348,"Poly(oxy-1,2-ethanediyl), alpha-(carboxymethyl...",Treatment of homocystinuria,Aeglea Biotherapeutics UK Limited,19 Oct 2020,-
4,EU/3/20/2347,Trehalose,Treatment of neuronal ceroid lipofuscinosis,Theranexus S.A.S.,19 Oct 2020,-
...,...,...,...,...,...,...
1735,EU/3/01/034,Gusperimus trihydrochloride,Treatment of Wegener’s granulomatosis,Nordic Group B.V.,29 Mar 2001,-
1736,EU/3/01/028,Inolimomab,Treatment of Graft versus Host Disease,Elsalys Biotech SA,05 Mar 2001,-
1737,EU/3/01/026,L-Lysine-N-acetyl-L-cysteinate,Treatment of cystic fibrosis,LABORATOIRES SMB SA,14 Feb 2001,-
1738,EU/3/00/013,Ethyl Eicosopentaenoate,Treatment of Huntington's disease,Amarin Neuroscience Limited,29 Dec 2000,-


## 1. Number of designations each year
(exclude 2000 and 2020, because data is incomplete)

In [3]:
import plotly.colors
import plotly.graph_objects as go

# Create container for time data
time_evolution = pd.DataFrame(columns={'Year', 'Number of designations'})

# Group data by year
years = pd.to_datetime(
    data['Designation date']).apply(lambda x: x.year)
for year in years.dropna().unique():
    # Ignore 2000 and 2020
    if (year == 2000) or (year == 2020):
        continue

    # Get outcomes for that year
    mask = years == year
    n_designations = years[mask].count()
    
    # Append to main container
    time_evolution = time_evolution.append(pd.DataFrame({
        'Year': [year],
        'Number of designations': [n_designations]}))

# Sort by time
time_evolution.sort_values(by='Year', inplace=True)

# Create line plot
fig = go.Figure()
colour = plotly.colors.qualitative.Plotly[0]

fig.add_trace(go.Scatter(
    x=time_evolution['Year'],
    y=time_evolution['Number of designations'],
    marker=dict(
        line=dict(color=colour, width=1))))

# Set axes and template
fig.update_layout(
    template="plotly_white",
    xaxis_title='Year',
    yaxis_title='Number of designations')

fig.show()

**Figure 1:** EC's record of orphan drug designations over time

In [33]:
time_evolution['Number of designations'].sum()

1629

Note: There is a lag from EMA positive numbers to EC numbers, likely because date of first decision and date of designation do not coincide.

## 2 Number of applications per active substance
The substance names are transformed to lower case, but other than that there is no processing or matching of the names at this point

### 2.1 Number of single applications versus number of multiple applications

In [26]:
# Create container for name, number of applications
substance_applications = pd.DataFrame(columns={'Product', 'Number of designations'})

# Get the number of applications
temp = data.copy()
temp['Product'] = temp['Product'].str.lower()
substances = temp['Product'].unique()
for substance in substances:
    # Get number
    mask = temp['Product'] == substance
    n_applications = len(temp[mask])

    # Get years of designation
    designation_date = ''
    for date in temp[mask]['Designation date']:
        if designation_date != '':
            designation_date += '/'
        designation_date += str(date)
    
    # Append to container
    substance_applications = substance_applications.append(pd.DataFrame({
        'Product': [substance],
        'Number of designations': [n_applications],
        'Designation date': designation_date}))

# Compute number of single and multi applications
mask = substance_applications['Number of designations'] == 1
n_single = len(substance_applications[mask])
mask = substance_applications['Number of designations'] > 1
n_multi = len(substance_applications[mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly

fig.add_trace(go.Pie(
    labels=['Single designation', 'Multiple designations'],
    values=[n_single, n_multi],
    sort=False,
    pull=[0, 0.2],
    marker=dict(
        colors=colours[:2], 
        line=dict(color='#000000', width=1))))
fig.show()

**Figure 2:** Number of orphan designation per product (combination of substances count as distinct, so do distinct names for the same substance).

### 2.2 Distribution among multiple applications

In [5]:
# Sort by number of applications
substance_applications.sort_values(by='Number of designations', inplace=True)

# Get number of applications
n_applications = []
n_substances = []
for n_application in substance_applications['Number of designations'].unique():
    # Skip if only one application
    if n_application <= 1:
        continue

    # Get number of substances
    mask = substance_applications['Number of designations'] == n_application
    number = len(substance_applications[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Accumulate >= 5 applications
n_applications = np.array(n_applications)
n_substances = np.array(n_substances)
mask = n_applications >= 5
n_substances_five_or_more = np.sum(n_substances[mask])
n_applications = n_applications[~mask]
n_substances = list(n_substances[~mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=[
        '%s designations' % str(n) for n in n_applications] + ['5 and more designations'],
    values=n_substances + [n_substances_five_or_more],
    sort=False,
    pull=[0, 0, 0, 0.2],
    marker=dict(
        colors=colours[:n_colours], 
        line=dict(color='#000000', width=1))))
fig.show()

**Figure 3:** Number of orphan designations per product with more than one application (combinations of products count as distinct, so do distinct names for the same product).

In [27]:
substance_applications

Unnamed: 0,Product,Number of designations,Designation date
0,adeno-associated virus serotype 5 containing t...,1,19 Oct 2020
0,"poly(oxy-1,2-ethanediyl), alpha-hydro-omega-me...",1,19 Oct 2020
0,miglustat,2,19 Oct 2020/11 Jan 2019
0,"poly(oxy-1,2-ethanediyl), alpha-(carboxymethyl...",1,19 Oct 2020
0,trehalose,4,19 Oct 2020/21 Aug 2020/19 Jun 2015/21 May 2015
...,...,...,...
0,gusperimus trihydrochloride,1,29 Mar 2001
0,inolimomab,1,05 Mar 2001
0,l-lysine-n-acetyl-l-cysteinate,1,14 Feb 2001
0,ethyl eicosopentaenoate,1,29 Dec 2000


## Get Brand names / Medicine name of multi deisgnation drugs

In [10]:
path = os.path.dirname(os.getcwd())
data_ema = pd.read_excel(path + '/data/ema_all_drugs.xlsx', header=8)
data_ema['Active substance'] = data_ema['Active substance'].str.upper()
data_ema

Unnamed: 0,Category,Medicine name,Therapeutic area,International non-proprietary name (INN) / common name,Active substance,Product number,Patient safety,Authorisation status,ATC code,Additional monitoring,...,Vet pharmacotherapeutic group,Date of opinion,Decision date,Revision number,Condition / indication,Species,ATCvet code,First published,Revision date,URL
0,Veterinary,Frontpro (previously known as Afoxolaner Merial),,afoxolaner,AFOXOLANER,EMEA/V/C/005126,no,Authorised,,no,...,Ectoparasiticides for systemic use,2019-03-21 01:00:00,2020-11-06 01:00:00,3.0,Treatment of flea (Ctenocephalides felis and C...,Dogs,QP53BE01,2019-06-07 17:00:00,2020-11-12 18:18:00,https://www.ema.europa.eu/en/medicines/veterin...
1,Human,Cholib,Dyslipidemias,"fenofibrate, simvastatin","FENOFIBRATE, SIMVASTATIN",EMEA/H/C/002559,no,Authorised,C10BA04,no,...,,2013-06-27 00:00:00,2020-10-23 00:00:00,12.0,Cholib is indicated as adjunctive therapy to d...,,,2018-08-20 00:00:00,2020-11-12 17:50:00,https://www.ema.europa.eu/en/medicines/human/E...
2,Human,Repaglinide Krka,"Diabetes Mellitus, Type 2",repaglinide,REPAGLINIDE,EMEA/H/C/001066,no,Authorised,A10BX02,no,...,,2009-07-23 00:00:00,2020-10-28 01:00:00,6.0,Repaglinide is indicated in patients with type...,,,2017-10-27 00:00:00,2020-11-12 17:30:00,https://www.ema.europa.eu/en/medicines/human/E...
3,Human,Liprolog,Diabetes Mellitus,insulin lispro,INSULIN LISPRO,EMEA/H/C/000393,no,Authorised,"A10AB04, A10AD04",no,...,,2001-04-26 00:00:00,2020-09-04 00:00:00,28.0,For the treatment of adults and children with ...,,,2017-10-23 00:00:00,2020-11-12 16:54:00,https://www.ema.europa.eu/en/medicines/human/E...
4,Human,Hexacima,"Hepatitis B, Tetanus, Immunization, Meningitis...","diphtheria, tetanus, pertussis (acellular, com...","DIPHTHERIA TOXOID / TETANUS TOXOID, TWO-COMPON...",EMEA/H/C/002702,no,Authorised,J07CA09,no,...,,2013-02-21 01:00:00,2020-09-24 00:00:00,21.0,Hexacima (DTaP-IPV-HB-Hib) is indicated for pr...,,,2018-01-08 12:30:00,2020-11-12 16:42:00,https://www.ema.europa.eu/en/medicines/human/E...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1730,Human,Liprolog,Diabetes Mellitus,insulin lispro,INSULIN LISPRO,EMEA/H/C/000143,no,Withdrawn,A10AB04,no,...,,NaT,2001-02-19 01:00:00,0.0,For the treatment of patients with diabetes me...,,,2001-02-19 01:00:00,2001-08-01 00:00:00,https://www.ema.europa.eu/en/medicines/human/E...
1731,Human,EchoGen,Echocardiography,dodecafluoropentane,DODECAFLUOROPENTANE,EMEA/H/C/000149,no,Withdrawn,V08DA,no,...,,NaT,2001-01-22 01:00:00,0.0,EchoGen is a transpulmonary echocardiographic ...,,,2001-01-22 01:00:00,2001-05-22 00:00:00,https://www.ema.europa.eu/en/medicines/human/E...
1732,Human,Ecokinase,Myocardial Infarction,reteplase,RETEPLASE,EMEA/H/C/000106,no,Withdrawn,B01AD07,no,...,,NaT,1999-07-30 00:00:00,0.0,Thrombolytic therapy of acute myocardial infar...,,,1999-07-30 00:00:00,2000-12-12 01:00:00,https://www.ema.europa.eu/en/medicines/human/E...
1733,Human,Primavax,"Hepatitis B, Tetanus, Immunization, Diphtheria","diphtheria, tetanus and hepatitis B vaccine, a...","DIPHTHERIA TOXOID PURIFIED, HEPATITIS B, RECOM...",EMEA/H/C/000156,no,Withdrawn,J07CA,no,...,,NaT,2000-07-27 00:00:00,0.0,This vaccine is indicated for active immunizat...,,,2000-07-27 00:00:00,2000-12-04 01:00:00,https://www.ema.europa.eu/en/medicines/human/E...


In [28]:
multi_designations = substance_applications.copy()
mask = multi_designations['Number of designations'] > 1
multi_designations = multi_designations[mask]
multi_designations['Medicine name'] = 'Not known'
multi_designations

Unnamed: 0,Product,Number of designations,Designation date,Medicine name
0,miglustat,2,19 Oct 2020/11 Jan 2019,Not known
0,trehalose,4,19 Oct 2020/21 Aug 2020/19 Jun 2015/21 May 2015,Not known
0,"2-(2-(18f)fluoropyridin-4-yl)-9h-pyrrolo[2,3-b...",2,19 Oct 2020/21 Aug 2020,Not known
0,"(4-{(2s,4s)-4-ethoxy-1-[(5-methoxy-7-methyl-1h...",3,19 Oct 2020/04 Jun 2020/14 Dec 2018,Not known
0,1-(3-methylbutanoyl)-l-aspartyl-l-threonyl-l-h...,2,19 Oct 2020/24 Aug 2018,Not known
...,...,...,...,...
0,alpha-1 antitrypsin (inhalation use),2,16 Nov 2004/16 Nov 2004,Not known
0,"sinapultide, dipalmitoylphosphatidylcholine, p...",3,29 Jul 2004/29 Jul 2004/04 Feb 2002,Not known
0,ciclosporin (inhalation use),2,29 Jul 2004/29 Jul 2004,Not known
0,recombinant antibody derivative against human ...,2,01 Dec 2003/01 Dec 2003,Not known


In [29]:
for product in multi_designations['Product'].str.upper():
    # Mask for product
    mask = data_ema['Active substance'] == product
    temp = data_ema[mask]['Medicine name']

    if len(temp) == 0:
        # No match was found
        continue
    
    # Construct medicine name (/ separates multiple candidates)
    medicine_name = ''
    for name in temp:
        if medicine_name != '':
            medicine_name += '/'
        medicine_name += name

    # Add Medicine name
    mask = multi_designations['Product'].str.upper() == product
    multi_designations.loc[mask, 'Medicine name'] = medicine_name

multi_designations

Unnamed: 0,Product,Number of designations,Designation date,Medicine name
0,miglustat,2,19 Oct 2020/11 Jan 2019,Miglustat Gen.Orph/Zavesca/Yargesa/Miglustat D...
0,trehalose,4,19 Oct 2020/21 Aug 2020/19 Jun 2015/21 May 2015,Not known
0,"2-(2-(18f)fluoropyridin-4-yl)-9h-pyrrolo[2,3-b...",2,19 Oct 2020/21 Aug 2020,Not known
0,"(4-{(2s,4s)-4-ethoxy-1-[(5-methoxy-7-methyl-1h...",3,19 Oct 2020/04 Jun 2020/14 Dec 2018,Not known
0,1-(3-methylbutanoyl)-l-aspartyl-l-threonyl-l-h...,2,19 Oct 2020/24 Aug 2018,Not known
...,...,...,...,...
0,alpha-1 antitrypsin (inhalation use),2,16 Nov 2004/16 Nov 2004,Not known
0,"sinapultide, dipalmitoylphosphatidylcholine, p...",3,29 Jul 2004/29 Jul 2004/04 Feb 2002,Not known
0,ciclosporin (inhalation use),2,29 Jul 2004/29 Jul 2004,Not known
0,recombinant antibody derivative against human ...,2,01 Dec 2003/01 Dec 2003,Not known


In [30]:
mask = multi_designations['Medicine name'] != 'Not known'
multi_designations[mask]

Unnamed: 0,Product,Number of designations,Designation date,Medicine name
0,miglustat,2,19 Oct 2020/11 Jan 2019,Miglustat Gen.Orph/Zavesca/Yargesa/Miglustat D...
0,pentosan polysulfate sodium,4,21 Aug 2020/12 Jan 2017/30 May 2016/19 Nov 2014,Elmiron
0,methotrexate,2,04 Jun 2020/29 Aug 2016,Jylamvo/Nordimet
0,dimethyl fumarate,3,09 Jan 2020/21 Mar 2018/14 Jul 2016,Skilarence/Tecfidera
0,propranolol hydrochloride,2,17 Oct 2019/27 Feb 2017,Hemangiol
0,temozolomide,2,21 Aug 2019/29 Aug 2016,Temodal/Temomedac/Temozolomide Hexal/Temozolom...
0,melatonin,7,11 Jan 2019/26 Oct 2018/21 Mar 2018/14 Oct 201...,Circadin/Slenyto
0,glucagon,3,19 Nov 2018/15 Oct 2014/05 Mar 2012,Baqsimi
0,deferiprone,2,27 Jun 2018/23 Feb 2011,Ferriprox/Deferiprone Lipomed
0,itraconazole,2,25 May 2018/23 Aug 2017,Fungitraxx


In [31]:
directory = os.path.dirname(os.getcwd())
path = os.path.join(directory, 'data/ods_multiple_applications.csv')
multi_designations[['Medicine name', 'Product', 'Number of designations', 'Designation date']].to_csv(path, index=False)