# EMA Orphan Drug Records

Source of data: https://www.ema.europa.eu/en/medicines/download-medicine-data#rare-disease-(orphan)-designations-section

In [1]:
import os

import numpy as np
import pandas as pd
import plotly.colors
import plotly.graph_objects as go

## Import data

In [2]:
path = os.path.dirname(os.getcwd())
data = pd.read_excel(path + '/data/ema_orphan_designations_2021.xlsx', header=8)
data

Unnamed: 0,Medicine name,Active substance,Agency product number,Date of designation / refusal of designation,Intended use,EU designation number,Status of orphan designation,First published,Revision date,URL
0,"Blectifor,",Caffeine citrate,EMEA/H/C/004100,2014-04-11 02:00:00,Prevention of bronchopulmonary dysplasia,EU/3/14/1261,Positive,2014-06-05 17:00:00,2014-06-05 17:00:00,https://www.ema.europa.eu/en/medicines/human/o...
1,,Vatreptacog alfa (activated),,2012-08-09 02:00:00,Treatment of haemophilia B,EU/3/12/1032,Withdrawn,2012-09-27 16:00:00,2015-03-06 19:20:00,https://www.ema.europa.eu/en/medicines/human/o...
2,,unoprostone isopropyl,,2013-06-19 02:00:00,Treatment of retinitis pigmentosa,EU/3/13/1146,Withdrawn,2013-07-11 14:00:00,2016-08-11 12:00:00,https://www.ema.europa.eu/en/medicines/human/o...
3,,Pyridoxal 5'-phosphate,,2014-10-15 02:00:00,Treatment of pyridoxamine 5'-phosphate oxidase...,EU/3/14/1347,Positive,2014-11-12 02:00:00,2014-11-12 02:00:00,https://www.ema.europa.eu/en/medicines/human/o...
4,,Norursodeoxycholic acid,,2014-07-04 02:00:00,Treatment of primary sclerosing cholangitis,EU/3/14/1288,Positive,2014-07-16 14:05:00,2014-07-16 14:05:00,https://www.ema.europa.eu/en/medicines/human/o...
...,...,...,...,...,...,...,...,...,...,...
2423,"Imnovid (previously Pomalidomide Celgene),",Pomalidomide,EMEA/H/C/002682,2009-10-08 00:00:00,Treatment of multiple myeloma,EU/3/09/672,Positive,2009-10-20 02:00:00,2021-06-10 11:12:00,https://www.ema.europa.eu/en/medicines/human/o...
2424,,2-Chloro-N6-(3-iodobenzyl)adenosine-5'-N-methy...,,2015-10-09 00:00:00,Treatment of hepatocellular carcinoma,EU/3/15/1565,Positive,2015-11-18 14:00:00,2021-06-10 12:52:00,https://www.ema.europa.eu/en/medicines/human/o...
2425,,Entinostat,,2010-06-10 00:00:00,Treatment of Hodgkin's lymphoma,EU/3/10/732,Positive,2010-06-24 02:00:00,2021-06-10 12:52:00,https://www.ema.europa.eu/en/medicines/human/o...
2426,,Adeno-associated viral vector serotype 2/2 con...,,2016-10-14 00:00:00,Treatment of retinitis pigmentosa,EU/3/16/1740,Withdrawn,2016-11-15 17:30:00,2021-06-11 09:32:00,https://www.ema.europa.eu/en/medicines/human/o...


## 1. Outcome of orphan drug status applications

### 1.1 Time averaged

In [4]:
# Group data based on outcome
assert np.array_equal(data['Status of orphan designation'].unique(), np.array(['Positive', 'Withdrawn', 'Negative', 'Expired']))
mask = data['Status of orphan designation'] == 'Positive'
positive = data[mask]
mask = data['Status of orphan designation'] == 'Withdrawn'
withdrawn = data[mask]
mask = data['Status of orphan designation'] == 'Negative'
negative = data[mask]
mask = data['Status of orphan designation'] == 'Expired'
expired = data[mask]

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Set1
fig.add_trace(go.Pie(
    labels=['Positive', 'Negative', 'Withdrawn'],
    values=[len(positive) + len(expired), len(negative), len(withdrawn)],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=[colours[2], colours[0], colours[1]],
        line=dict(color='#000000', width=1))))

# Save image as pdf
directory = os.path.dirname(os.getcwd())
fig.write_image(
    directory + '/images/ema_outcome_applications_2000_2020.pdf')
fig.show()

**Figure 1:** EMA's record of orphan drug outcomes from 08/08/2000 to 07/11/2020

### 1.2 Time evolution 
(exclude 2000 and 2020, because data is incomplete)

In [5]:
# Create container for time data
time_evolution = pd.DataFrame(columns={'Year', 'Positive', 'Negative', 'Withdrawn'})

# Group data by year
temp = data.copy()
temp['Year'] = data['Date of designation / refusal of designation'].apply(lambda x: x.year)
for year in temp['Year'].dropna().unique():
    # Ignore 2021 (because we are in the middle of the year)
    if year > 2020:
        continue

    # Get outcomes for that year
    mask = temp['Year'] == year
    temp2 = temp[mask]

    # Count positives (including expired)
    mask = (temp2['Status of orphan designation'] == 'Positive') | (temp2['Status of orphan designation'] == 'Expired')
    n_positive = len(temp2[mask])

    # Count negative
    mask = temp2['Status of orphan designation'] == 'Negative'
    n_negative = len(temp2[mask])

    # Count withdrawn
    mask = temp2['Status of orphan designation'] == 'Withdrawn'
    n_withdrawn = len(temp2[mask])
    
    # Append to main container
    time_evolution = time_evolution.append(pd.DataFrame({
        'Year': [year],
        'Positive': [n_positive],
        'Negative': [n_negative],
        'Withdrawn': [n_withdrawn]}))

# Sort by time
time_evolution.sort_values(by='Year', inplace=True)

# Create line plot
fig = go.Figure()
colours = plotly.colors.qualitative.Set1
colour_index = [2, 0, 1]
for index, key in enumerate(['Positive', 'Negative', 'Withdrawn']):
    fig.add_trace(go.Scatter(
        x=time_evolution['Year'],
        y=time_evolution[key],
        name=key,
        line=dict(color=colours[colour_index[index]])
    ))

# Set axes and template
fig.update_layout(
    template="simple_white",
    xaxis_title='Year',
    yaxis_title='Number',
    showlegend=False)

fig.write_image(
    directory + '/images/ema_outcome_applications_per_year_2000_2020.pdf')

fig.show()

**Figure 2:** EMA's record of orphan drug outcomes over time

In [6]:
mask = time_evolution['Year'] >= 2010
time_evolution[mask]['Positive'].sum()

1400

In [7]:
mask = time_evolution['Year'] <= 2010
time_evolution[mask]['Positive'].sum()

480

## 2 Number of applications per active substance
The substance names are transformed to lower case, but other than that there is no processing or matching of the names at this point

### 2.1 Number of single applications versus number of multiple applications

In [8]:
# Create container for name, number of applications
substance_applications = pd.DataFrame(columns={'Active substance', 'Number of applications'})

# Get the number of applications
temp = positive.copy()
temp['Active substance'] = temp['Active substance'].str.lower()
substances = temp['Active substance'].unique()
for substance in substances:
    # Get number
    mask = temp['Active substance'] == substance
    n_applications = len(temp[mask])

    # Get years of designation
    designation_date = ''
    for date in temp[mask]['Date of designation / refusal of designation']:
        if designation_date != '':
            designation_date += '/'
        designation_date += str(date)

    # Get trade name
    trade_name = ''
    n_aproved = 0
    for name in temp[mask]['Medicine name']:
        try:
            np.isnan(name)
            continue
        except TypeError:
            if trade_name != '':
                trade_name += '/'
            trade_name += str(name)
            n_aproved += 1

    # Get indication
    indication = ''
    for ind in temp[mask]['Intended use']:
        if indication != '':
            indication += '/'
        indication += str(ind)

    # Append to container
    substance_applications = substance_applications.append(pd.DataFrame({
        'Active substance': [substance],
        'Number of applications': [n_applications],
        'Indication': indication,
        'Designation date': designation_date,
        'Trade name': trade_name,
        'Approved / marketed application': n_aproved}))

# Compute number of single and multi applications
mask = substance_applications['Number of applications'] == 1
n_single = len(substance_applications[mask])
mask = substance_applications['Number of applications'] > 1
n_multi = len(substance_applications[mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly

fig.add_trace(go.Pie(
    labels=['Single application', 'Multiple applications'],
    values=[n_single, n_multi],
    sort=False,
    pull=[0, 0.2],
    marker=dict(
        colors=colours[:2], 
        line=dict(color='#000000', width=1))))
fig.show()

**Figure 3:** Number of orphan status applications per active substance (combination of substances count as distinct, so do distinct names for the same substance).

### 2.2 Distribution among multiple applications

In [9]:
# Sort by number of applications
substance_applications.sort_values(by='Number of applications', inplace=True)

# Get number of applications
n_applications = []
n_substances = []
for n_application in substance_applications['Number of applications'].unique():
    # Skip if only one application
    if n_application == 0:
        continue

    # Get number of substances
    mask = substance_applications['Number of applications'] == n_application
    number = len(substance_applications[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Accumulate >= 5 applications
n_applications = np.array(n_applications)
n_substances = np.array(n_substances)
mask = n_applications >= 3
n_substances_five_or_more = np.sum(n_substances[mask])
n_applications = n_applications[~mask]
n_substances = list(n_substances[~mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Set1
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=['%s ODD' % str(n) for n in n_applications] + ['>2 ODD'],
    values=n_substances + [n_substances_five_or_more],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=[colours[2], colours[1], colours[0]],
        line=dict(color='#000000', width=1))))

fig.update_layout(showlegend=False)

fig.write_image(
    directory + '/images/ema_number_OD_designations_per_product_2000_2020.pdf')

fig.show()

**Figure 4:** Number of orphan status applications per active substance with more than one application(combination of substances count as distinct, so do distinct names for the same substance).

## Get brand names

In [10]:
mask = substance_applications['Trade name'] != ''
substance_applications[mask]

Unnamed: 0,Number of applications,Active substance,Indication,Designation date,Trade name,Approved / marketed application
0,1,caffeine citrate,Prevention of bronchopulmonary dysplasia,2014-04-11 02:00:00,"Blectifor,",1.0
0,1,phosphorothioate oligonucleotide targeted to t...,Treatment of ATTR amyloidosis,2014-03-26 01:00:00,"Tegsedi,",1.0
0,1,human coagulation factor x,Treatment of hereditary factor X deficiency,2007-09-14 00:00:00,"Coagadex,",1.0
0,1,mannitolum,Treatment of cystic fibrosis,2005-11-07 01:00:00,"Bronchitol,",1.0
0,1,autologous haematopoietic stem cells transduce...,Treatment of adrenoleukodystrophy,2012-06-06 00:00:00,"Skysona,",1.0
...,...,...,...,...,...,...
0,7,glyceryl tri-(4-phenylbutyrate),Treatment of carbamoyl-phosphate synthase-1 de...,2010-02-03 01:00:00/2010-06-10 00:00:00/2010-0...,"Ravicti, /Ravicti, /Ravicti, /Ravicti, /Ravict...",6.0
0,7,cannabidiol,Treatment of perinatal asphyxia/Prevention of ...,2015-07-28 00:00:00/2016-04-28 02:00:00/2016-0...,"Epidyolex, /Epidyolex, /Epidyolex,",3.0
0,7,ciclosporin,Treatment of atopic keratoconjunctivitis/Treat...,2009-07-24 00:00:00/2007-10-29 01:00:00/2007-1...,"Verkazia,",1.0
0,9,sirolimus,Treatment of chronic non-infectious uveitis/Tr...,2011-08-30 02:00:00/2015-10-09 02:00:00/2016-0...,"Xegafri,",1.0


In [11]:
n_total = len(positive)
n_marketed = np.sum(substance_applications['Approved / marketed application'])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Set1

fig.add_trace(go.Pie(
    labels=[
        'Marketed'] + ['Not marketed'],
    values=[n_marketed, n_total - n_marketed],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=[colours[2], colours[1]],
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=False)

fig.write_image(
    directory + '/images/ema_OD_market_authorisations_2000_2020.pdf')
fig.show()

In [75]:
mask = (substance_applications['Trade name'] != '') & (substance_applications['Number of applications'] > 1)
substance_applications[mask]

Unnamed: 0,Number of applications,Active substance,Indication,Designation date,Trade name,Approved / marketed application
0,2,velaglucerase alfa,Treatment of Gaucher disease/Treatment of Gauc...,2010-06-06 00:00:00/2010-06-06 02:00:00,"Vpriv,",1.0
0,2,daratumumab,Treatment of AL amyloidosis/nan,2018-05-25 14:00:00/2013-07-17 00:00:00,"Darzalex,",1.0
0,2,ivosidenib,Treatment of acute myeloid leukaemia/Treatment...,2016-12-12 01:00:00/2018-03-21 01:00:00,"Tibsovo,",1.0
0,2,ngr-human tumour necrosis factor,Treatment of malignant mesothelioma/Treatment ...,2008-06-03 00:00:00/2009-11-09 01:00:00,"Zafiride,",1.0
0,2,pemigatinib,Treatment of myeloid/lymphoid neoplasms with e...,2019-10-17 00:00:00/2018-08-24 00:00:00,"Pemazyre, /Pemazyre,",2.0
0,2,midostaurin,Treatment of acute myeloid leukaemia/Treatment...,2004-07-29 02:00:00/2010-08-04 02:00:00,"Rydapt, /Rydapt,",2.0
0,2,masitinib mesilate,Treatment of pancreatic cancer/Treatment of am...,2009-10-28 01:00:00/2016-08-29 02:00:00,"Masican, /Alsitek,",2.0
0,2,autologous tumor-derived gp96 heat shock prote...,Treatment of renal-cell carcinoma/Treatment of...,2005-04-11 00:00:00/2009-04-29 00:00:00,"Oncophage,",1.0
0,2,autologous t cells transduced with lentiviral ...,Treatment of diffuse large B-cell lymphoma/Tre...,2016-10-14 00:00:00/2014-04-29 00:00:00,"Kymriah, /Kymriah,",2.0
0,2,mifepristone,Treatment of hypercortisolism (Cushing's syndr...,2009-02-27 01:00:00/2011-10-27 02:00:00,"Corluxin,",1.0


In [77]:
mask = (substance_applications['Trade name'] != '') & (substance_applications['Number of applications'] > 2)
substance_applications[mask]

Unnamed: 0,Number of applications,Active substance,Indication,Designation date,Trade name,Approved / marketed application
0,3,budesonide,Treatment of graft-versus-host disease/Treatme...,2006-11-03 01:00:00/2016-11-18 12:00:00/2013-0...,"Jorveza,",1.0
0,3,obinutuzumab,Treatment of diffuse large B-cell lymphoma/Tre...,2014-08-22 02:00:00/2015-06-19 02:00:00/2012-1...,"Gazyvaro, /Gazyvaro,",2.0
0,3,amikacin sulfate,Treatment of Pseudomonas aeruginosa lung infec...,2014-12-16 01:00:00/2006-07-25 00:00:00/2014-0...,"Arikayce liposomal,",1.0
0,3,eculizumab,Treatment of atypical haemolytic uraemic syndr...,2009-07-24 00:00:00/2013-08-05 00:00:00/2014-0...,"Soliris, /Soliris, /Soliris,",3.0
0,3,cysteamine bitartrate (gastroresistant) (merca...,Treatment of cystinosis/Treatment of pancreati...,2010-09-20 00:00:00/2014-03-26 01:00:00/2014-0...,"Procysbi,",1.0
0,3,n-tert-butyl-3-[(5-methyl-2-{[4-(2-pyrrolidin-...,Treatment of post-essential thrombocythaemia m...,2010-11-26 01:00:00/2010-11-26 01:00:00/2010-1...,"Inrebic, /Inrebic, /Inrebic,",3.0
0,3,l-asparaginase encapsulated in erythrocytes,Treatment of acute lymphoblastic leukaemia/Tre...,2006-10-27 02:00:00/2009-05-15 00:00:00/2013-0...,"Graspa,",1.0
0,3,n-(methyl-diazacyclohexyl-methylbenzamide)-aza...,Treatment of multiple myeloma/Treatment of mas...,2005-06-20 00:00:00/2004-11-16 01:00:00/2004-1...,"Masipro,",1.0
0,3,mexiletine hydrochloride,Treatment of myotonic disorders/Treatment of m...,2013-10-07 02:00:00/2014-11-19 01:00:00/2013-0...,"Namuscla,",1.0
0,3,ketoconazole,Treatment of Cushing's syndrome/Treatment of g...,2012-08-09 02:00:00/2017-03-20 02:00:00/2012-0...,"Ketoconazole HRA,",1.0
