# EMA Orphan Drug Records

Source of data: https://www.ema.europa.eu/en/medicines/download-medicine-data#rare-disease-(orphan)-designations-section

In [6]:
import os

import numpy as np
import pandas as pd

path = os.path.dirname(os.getcwd())
data = pd.read_excel(path + '/data/ema_orphan_designations.xlsx', header=8)
data

Unnamed: 0,Medicine name,Active substance,Agency product number,Date of first decision,Disease / condition,EU designation number,Status of orphan designation,First published,Revision date,URL
0,,Argon,,2018-06-27 14:00:00,Treatment of perinatal asphyxia,EU/3/18/2031,Positive,2018-08-22 13:20:00,2018-08-22 13:20:00,https://www.ema.europa.eu/en/medicines/human/o...
1,,Heterologous human adult liver-derived stem cells,,2011-09-27 02:00:00,Treatment of ornithine transcarbamylase defici...,EU/3/11/904,Positive,2011-10-17 12:10:00,2011-10-17 12:10:00,https://www.ema.europa.eu/en/medicines/human/o...
2,,Sodium phenylbutyrate,,2012-02-09 02:00:00,Treatment of ornithine-transcarbamylase defici...,EU/3/12/950,Withdrawn,2012-03-01 13:45:00,2013-10-09 16:00:00,https://www.ema.europa.eu/en/medicines/human/o...
3,"Mepsevii,",Recombinant human beta-glucuronidase (vestroni...,EMEA/H/C/004438,2012-03-21 02:00:00,Treatment of mucopolysaccharidosis type VII (S...,EU/3/12/973,Positive,2012-04-19 14:25:00,2018-09-07 16:00:00,https://www.ema.europa.eu/en/medicines/human/o...
4,,Pegylated recombinant factor VIIa,,2008-06-03 02:00:00,Treatment of haemophilia B,EU/3/08/552,Withdrawn,2008-08-18 02:09:21,2015-03-06 16:00:00,https://www.ema.europa.eu/en/medicines/human/o...
...,...,...,...,...,...,...,...,...,...,...
2322,,Setmelanotide,,2019-08-21 00:00:00,Treatment of Bardet-Biedl syndrome,EU/3/19/2192,Positive,2020-01-14 16:00:00,2020-11-09 11:47:00,https://www.ema.europa.eu/en/medicines/human/o...
2323,,Setmelanotide,,2020-01-09 01:00:00,Treatment of Alström syndrome,EU/3/19/2245,Positive,2020-05-20 11:20:00,2020-11-09 11:48:00,https://www.ema.europa.eu/en/medicines/human/o...
2324,,Setmelanotide,,2018-11-19 01:00:00,Treatment of leptin receptor deficiency,EU/3/18/2101,Positive,2019-02-25 14:25:00,2020-11-09 11:49:00,https://www.ema.europa.eu/en/medicines/human/o...
2325,,Setmelanotide,,2016-07-14 00:00:00,Treatment of pro-opiomelanocortin deficiency,EU/3/16/1703,Positive,2016-09-06 16:00:00,2020-11-09 11:50:00,https://www.ema.europa.eu/en/medicines/human/o...


## 1. Outcome of orphan drug status applications

### 1.1 Time averaged

In [37]:
import plotly.colors
import plotly.graph_objects as go

# Group data based on outcome
assert np.array_equal(data['Status of orphan designation'].unique(), np.array(['Positive', 'Withdrawn', 'Negative', 'Expired']))
mask = data['Status of orphan designation'] == 'Positive'
positive = data[mask]
mask = data['Status of orphan designation'] == 'Withdrawn'
withdrawn = data[mask]
mask = data['Status of orphan designation'] == 'Negative'
negative = data[mask]
mask = data['Status of orphan designation'] == 'Expired'
expired = data[mask]

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly

fig.add_trace(go.Pie(
    labels=['Positive', 'Negative', 'Withdrawn'],
    values=[len(positive) + len(expired), len(negative), len(withdrawn)],
    pull=[0.1, 0, 0],
    sort=False,
    marker=dict(
        colors=colours[:3], 
        line=dict(color='#000000', width=1))))
fig.show()

**Figure 1:** EMA's record of orphan drug outcomes from 08/08/2000 to 07/11/2020

### 1.2 Time evolution 
(exclude 2000 and 2020, because data is incomplete)

In [64]:
# Create container for time data
time_evolution = pd.DataFrame(columns={'Year', 'Positive', 'Negative', 'Withdrawn'})

# Group data by year
temp = data.copy()
temp['Year'] = data['Date of first decision'].apply(lambda x: x.year)
for year in temp['Year'].dropna().unique():
    # Ignore 2000 and 2020
    if (year == 2000) or (year == 2020):
        continue

    # Get outcomes for that year
    mask = temp['Year'] == year
    temp2 = temp[mask]

    # Count positives (including expired)
    mask = (temp2['Status of orphan designation'] == 'Positive') | (temp2['Status of orphan designation'] == 'Expired')
    n_positive = len(temp2[mask])

    # Count negative
    mask = temp2['Status of orphan designation'] == 'Negative'
    n_negative = len(temp2[mask])

    # Count withdrawn
    mask = temp2['Status of orphan designation'] == 'Withdrawn'
    n_withdrawn = len(temp2[mask])
    
    # Append to main container
    time_evolution = time_evolution.append(pd.DataFrame({
        'Year': [year],
        'Positive': [n_positive],
        'Negative': [n_negative],
        'Withdrawn': [n_withdrawn]}))

# Sort by time
time_evolution.sort_values(by='Year', inplace=True)

# Create line plot
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly

for index, key in enumerate(['Positive', 'Negative', 'Withdrawn']):
    fig.add_trace(go.Scatter(
        x=time_evolution['Year'],
        y=time_evolution[key],
        name=key,
        marker=dict(
            line=dict(color=colours[index], width=1))))

# Set axes and template
fig.update_layout(
    template="plotly_white",
    xaxis_title='Year',
    yaxis_title='Number')

fig.show()

**Figure 2:** EMA's record of orphan drug outcomes over time

## 2 Number of applications per active substance
The substance names are transformed to lower case, but other than that there is no processing or matching of the names at this point

### 2.1 Number of single applications versus number of multiple applications

In [74]:
# Create container for name, number of applications
substance_applications = pd.DataFrame(columns={'Active substance', 'Number of applications'})

# Get the number of applications
temp = data.copy()
temp['Active substance'] = temp['Active substance'].str.lower()
substances = temp['Active substance'].unique()
for substance in substances:
    # Get number
    mask = temp['Active substance'] == substance
    n_applications = len(temp[mask])

    # Append to container
    substance_applications = substance_applications.append(pd.DataFrame({
        'Active substance': [substance],
        'Number of applications': [n_applications]}))

# Compute number of single and multi applications
mask = substance_applications['Number of applications'] == 1
n_single = len(substance_applications[mask])
mask = substance_applications['Number of applications'] > 1
n_multi = len(substance_applications[mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly

fig.add_trace(go.Pie(
    labels=['Single application', 'Multiple applications'],
    values=[n_single, n_multi],
    sort=False,
    pull=[0, 0.2],
    marker=dict(
        colors=colours[:2], 
        line=dict(color='#000000', width=1))))
fig.show()

**Figure 3:** Number of orphan status applications per active substance (combination of substances count as distinct, so do distinct names for the same substance).

### 2.2 Distribution among multiple applications

In [76]:
# Get number of applications
n_applications = []
n_substances = []
for n_application in substance_applications['Number of applications'].unique():
    # Skip if only one application
    if n_application == 1:
        continue

    # Get number of substances
    mask = substance_applications['Number of applications'] == n_application
    number = len(substance_applications[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly
n_colours = len(n_substances)

fig.add_trace(go.Pie(
    labels=['%s applications' % str(n) for n in n_applications],
    values=n_substances,
    sort=False,
    marker=dict(
        colors=colours[:n_colours], 
        line=dict(color='#000000', width=1))))
fig.show()