# EC Orphan Drug Records

Source of data: https://ec.europa.eu/health/documents/community-register/html/reg_od_act.htm?sort=n

In [1]:
import os

import numpy as np
import pandas as pd
import plotly.colors
import plotly.graph_objects as go

In [2]:
directory = os.path.dirname(os.getcwd())
path = os.path.join(directory, 'data/ec_orphan_desigantions_June_2021.xlsx')
data = pd.read_excel(path, header=2)
data

Unnamed: 0,EU #,Product,Indication,Sponsor,Designation date,Tradename - EU product # - Implemented on
0,EU/3/21/2426,Ganglioside GM1,Treatment of amyotrophic lateral sclerosis,3R Pharma Consulting GmbH,13 Apr 2021,-
1,EU/3/21/2425,S-[5-(omega-methoxypoly(oxyethylene)-2-oxopent...,Treatment of cutaneous T-cell lymphoma,Almirall S.A.,13 Apr 2021,-
2,EU/3/21/2424,Cevostamab,Treatment of multiple myeloma,Roche Registration GmbH,13 Apr 2021,-
3,EU/3/21/2423,Vatiquinone,Treatment of Friedreich's ataxia,PTC Therapeutics International Limited,26 Mar 2021,-
4,EU/3/21/2422,Lorcaserin hydrochloride,Treatment of Dravet syndrome,Premier Research Group S.L.,26 Mar 2021,-
...,...,...,...,...,...,...
1763,EU/3/01/034,Gusperimus trihydrochloride,Treatment of Wegener’s granulomatosis,Nordic Group B.V.,29 Mar 2001,-
1764,EU/3/01/028,Inolimomab,Treatment of Graft versus Host Disease,Elsalys Biotech SA,05 Mar 2001,-
1765,EU/3/01/026,L-Lysine-N-acetyl-L-cysteinate,Treatment of cystic fibrosis,LABORATOIRES SMB SA,14 Feb 2001,-
1766,EU/3/00/013,Ethyl Eicosopentaenoate,Treatment of Huntington's disease,Amarin Neuroscience Limited,29 Dec 2000,-


## 1. Number of designations each year
(from 2001 to 2020)

In [3]:
# Create container for time data
time_evolution = pd.DataFrame(columns={'Year', 'Number of designations'})

# Group data by year
years = pd.to_datetime(
    data['Designation date']).apply(lambda x: x.year)
for year in years.dropna().unique():
    # Ignore 2000 and 2020
    if (year < 2001) or (year > 2020):
        continue

    # Get outcomes for that year
    mask = years == year
    n_designations = years[mask].count()
    
    # Append to main container
    time_evolution = time_evolution.append(pd.DataFrame({
        'Year': [year],
        'Number of designations': [n_designations]}))

# Sort by time
time_evolution.sort_values(by='Year', inplace=True)

# Create line plot
fig = go.Figure()
colour = plotly.colors.qualitative.Plotly[0]

fig.add_trace(go.Scatter(
    x=time_evolution['Year'],
    y=time_evolution['Number of designations'],
    marker=dict(
        line=dict(color=colour, width=1))))

# Set axes and template
fig.update_layout(
    template="plotly_white",
    xaxis_title='Year',
    yaxis_title='Number of designations')

fig.show()

**Figure 1:** EC's record of orphan drug designations over time

In [4]:
time_evolution['Number of designations'].sum()

1724

## 2 Number of applications per active substance
Here we consider all data from 2000 - 2021!

### 2.1 Number of single applications versus number of multiple applications

In [5]:
# Create container for name, number of applications
substance_applications = pd.DataFrame(
    columns={'Product', 'Number of designations', 'Trade names', 'Trade name', 'Indication', 'Designation date', 'Number of MAs', 'EU number', 'Implemented on'})

# Get the number of applications
temp = data.copy()
temp['Product'] = temp['Product'].str.lower()
substances = temp['Product'].unique()
for substance in substances:
    # Get number
    mask = temp['Product'] == substance
    n_applications = len(temp[mask])

    # Get years of designation
    designation_date = ''
    for date in temp[mask]['Designation date']:
        if designation_date != '':
            designation_date += '\n '
        designation_date += str(date)

    # Get trade name
    trade_name = ''
    trade_names = ''
    eu_number = ''
    implemented_on = ''
    n_approved = 0
    for name in temp[mask]['Tradename - EU product # - Implemented on']:
        if trade_names != '':
            trade_names += '\n '
            eu_number += '\n '
            implemented_on += '\n '
        if name == '-':
            trade_names += '-'
            eu_number += '-'
            implemented_on += '-'
            continue
        tn, eu_n, date = name.split(' - \n')
        trade_names += tn
        eu_number += eu_n
        implemented_on += date
        n_approved += 1
        if trade_name == '':
            trade_name += tn

    # Get indication
    indication = ''
    for ind in temp[mask]['Indication']:
        if indication != '':
            indication += '\n '
        indication += ind
    
    # Append to container
    substance_applications = substance_applications.append(pd.DataFrame({
        'Product': [substance],
        'Number of designations': [n_applications],
        'Indication': indication,
        'Designation date': designation_date,
        'Trade name': trade_name,
        'Trade names': trade_names,
        'Number of MAs': n_approved,
        'EU number': eu_number,
        'Implemented on': implemented_on}))

# Compute number of single and multi applications
mask = substance_applications['Number of designations'] == 1
n_single = len(substance_applications[mask])
mask = substance_applications['Number of designations'] > 1
n_multi = len(substance_applications[mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly

fig.add_trace(go.Pie(
    labels=['Single designation', 'Multiple designations'],
    values=[n_single, n_multi],
    sort=False,
    pull=[0, 0.2],
    marker=dict(
        colors=colours[:2], 
        line=dict(color='#000000', width=1))))
fig.show()

**Figure 2:** Number of orphan designation per product (combination of substances count as distinct, so do distinct names for the same substance).

### 2.2 Distribution among multiple applications

In [8]:
# Sort by number of applications
substance_applications.sort_values(by='Number of designations', inplace=True)

# Get number of applications
n_applications = []
n_substances = []
for n_application in substance_applications['Number of designations'].unique():
    # Get number of substances
    mask = substance_applications['Number of designations'] == n_application
    number = len(substance_applications[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Accumulate >= 5 applications
n_applications = np.array(n_applications)
n_substances = np.array(n_substances)
mask = n_applications >= 3
n_substances_three_or_more = np.sum(n_substances[mask])
n_applications = n_applications[~mask]
n_substances = list(n_substances[~mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Set1
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=[
        '%s ODD' % str(n) for n in n_applications] + ['>2 ODD'],
    values=n_substances + [n_substances_three_or_more],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=[colours[2], colours[1], colours[0]],
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=False)

directory = os.path.dirname(os.getcwd())
fig.write_image(
    directory + '/images/ec_number_OD_designations_per_product_2000_2020.pdf')
fig.show()

**Figure 3:** Number of orphan designations per product with more than one application (combinations of products count as distinct, so do distinct names for the same product).

In [9]:
# Sort by number of applications
substance_applications.sort_values(by='Number of designations', inplace=True)

# Get number of applications
n_applications = []
n_substances = []
for n_application in substance_applications['Number of designations'].unique():
    if n_application < 2:
        continue

    # Get number of substances
    mask = substance_applications['Number of designations'] == n_application
    number = len(substance_applications[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Accumulate >= 5 applications
n_applications = np.array(n_applications)
n_substances = np.array(n_substances)
mask = n_applications >= 5
n_substances_five_or_more = np.sum(n_substances[mask])
n_applications = n_applications[~mask]
n_substances = list(n_substances[~mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=[
        '%s ODD' % str(n) for n in n_applications] + ['>5'],
    values=n_substances + [n_substances_five_or_more],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=['rgb(33, 75, 99)', 'rgb(79, 129, 102)', 'rgb(151, 179, 100)',
                 'rgb(175, 49, 35)', 'rgb(36, 73, 147)'], 
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=True)
fig.show()

## Get Brand names / Medicine name of multi deisgnation drugs

In [72]:
substance_applications

Unnamed: 0,Indication,Implemented on,Number of MAs,Trade names,Designation date,Trade name,Product,EU number,Number of designations
0,Treatment of amyotrophic lateral sclerosis,-,0,-,13 Apr 2021,,ganglioside gm1,-,1
0,Treatment of idiopathic pulmonary fibrosis,-,0,-,24 Aug 2018,,tilorone,-,1
0,Treatment of growth hormone deficiency,-,0,-,24 Aug 2018,,somapacitan,-,1
0,Treatment of anthrax,19 Nov 2020,1,Obiltoxaximab SFL,24 Aug 2018,Obiltoxaximab SFL,obiltoxaximab,EU/1/20/1485,1
0,Treatment of marginal zone lymphoma,-,0,-,24 Aug 2018,,copanlisib,-,1
...,...,...,...,...,...,...,...,...,...
0,Treatment of citrullinaemia type 2\n Treatment...,-\n 01 Dec 2015\n 01 Dec 2015\n 01 Dec 2015\n ...,6,-\n Ravicti\n Ravicti\n Ravicti\n Ravicti\n Ra...,10 Jun 2010\n 10 Jun 2010\n 10 Jun 2010\n 10 J...,Ravicti,glyceryl tri-(4-phenylbutyrate),-\n EU/1/15/1062\n EU/1/15/1062\n EU/1/15/1062...,7
0,Treatment of perinatal asphyxia\n Treatment of...,-\n -\n -\n -\n -\n -\n -,0,-\n -\n -\n -\n -\n -\n -,11 Jan 2019\n 26 Oct 2018\n 21 Mar 2018\n 14 O...,,melatonin,-\n -\n -\n -\n -\n -\n -,7
0,Treatment of tuberous sclerosis\n Treatment of...,20 Apr 2021\n -\n 23 Sep 2019\n -\n -\n -\n 23...,3,Epidyolex\n -\n Epidyolex\n -\n -\n -\n Epidyolex,17 Jan 2018\n 16 Oct 2017\n 20 Mar 2017\n 29 A...,Epidyolex,cannabidiol,EU/1/19/1389\n -\n EU/1/19/1389\n -\n -\n -\n ...,7
0,Treatment of sickle cell disease\n Treatment o...,-\n -\n -\n -\n -\n -\n -\n -\n -,0,-\n -\n -\n -\n -\n -\n -\n -\n -,17 Jan 2018\n 23 Aug 2017\n 17 Jul 2017\n 20 J...,,sirolimus,-\n -\n -\n -\n -\n -\n -\n -\n -,9


In [73]:
# How many ODs have been marketed?
mask = substance_applications['Number of MAs'] > 0
substance_applications[mask]

Unnamed: 0,Indication,Implemented on,Number of MAs,Trade names,Designation date,Trade name,Product,EU number,Number of designations
0,Treatment of anthrax,19 Nov 2020,1,Obiltoxaximab SFL,24 Aug 2018,Obiltoxaximab SFL,obiltoxaximab,EU/1/20/1485,1
0,Treatment of diffuse large B-cell lymphoma,20 Jan 2020,1,Polivy,16 Apr 2018,Polivy,polatuzumab vedotin,EU/1/19/1388,1
0,Treatment of mantle cell lymphoma,15 Dec 2020,1,Tecartus,13 Nov 2019,Tecartus,autologous peripheral blood t cells cd4 and cd...,EU/1/20/1492,1
0,Treatment of spinal muscular atrophy,29 Mar 2021,1,Evrysdi,26 Feb 2019,Evrysdi,risdiplam,EU/1/21/1531,1
0,Treatment of anal fistula,27 Mar 2018,1,Alofisel,08 Oct 2009,Alofisel,expanded human allogeneic mesenchymal adult st...,EU/1/17/1261,1
...,...,...,...,...,...,...,...,...,...
0,Treatment of moderate and severe closed trauma...,-\n -\n -\n 10 Jul 2018,1,-\n -\n -\n Verkazia,01 Oct 2010\n 29 Oct 2007\n 22 Oct 2007\n 06 A...,Verkazia,ciclosporin,-\n -\n -\n EU/1/17/1219,4
0,Treatment of follicular lymphoma\n Treatment o...,-\n -\n -\n 27 Aug 2018\n -,1,-\n -\n -\n Yescarta\n -,11 Nov 2015\n 11 Nov 2015\n 11 Nov 2015\n 09 O...,Yescarta,autologous t cells transduced with retroviral ...,-\n -\n -\n EU/1/18/1299\n -,5
0,Treatment of graft-versus-host disease\n Treat...,-\n -\n 07 Jul 2015\n -\n -,1,-\n -\n IMBRUVICA\n -\n -,18 Nov 2016\n 10 Aug 2015\n 29 Apr 2014\n 18 D...,IMBRUVICA,ibrutinib,-\n -\n EU/1/14/945\n -\n -,5
0,Treatment of citrullinaemia type 2\n Treatment...,-\n 01 Dec 2015\n 01 Dec 2015\n 01 Dec 2015\n ...,6,-\n Ravicti\n Ravicti\n Ravicti\n Ravicti\n Ra...,10 Jun 2010\n 10 Jun 2010\n 10 Jun 2010\n 10 J...,Ravicti,glyceryl tri-(4-phenylbutyrate),-\n EU/1/15/1062\n EU/1/15/1062\n EU/1/15/1062...,7


How many designations lead to marketed drugs so far?

In [10]:
n_total = len(data)
n_marketed = np.sum(substance_applications['Number of MAs'])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.T10

fig.add_trace(go.Pie(
    labels=[
        'Marketed'] + ['Not marketed'],
    values=[n_marketed, n_total - n_marketed],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=[colours[0], colours[-1]], 
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=False)

fig.write_image(
    directory + '/images/ec_number_market_authorisations_2000_2020.pdf')
fig.show()

### How many of those marketed drugs have more than one designation?
We need to make sure that each trade name only occurs once (see below sum products describe the same active substance, but are named differently. They can only be merged via their common trade name).

In [75]:
# Mask those products that have not been marketed
mask = substance_applications['Trade name'] != ''
temp = substance_applications[mask].copy()

# Create container for marketed ODs
marketed_ods = pd.DataFrame(
    columns={'Product', 'Number of designations', 'Trade names', 'Trade name', 'Indication', 'Designation date', 'Number of MAs', 'EU number', 'Implemented on'})

trade_names = temp['Trade name'].unique()
for trade_name in trade_names:
    # Mask for all rows with that trade name
    mask = temp['Trade name'] == trade_name

    designation_date = ''
    trade_names = ''
    eu_number = ''
    implemented_on = ''
    n_approved = 0
    n_designations = 0
    indication = ''
    product = ''
    for _, row in temp[mask].iterrows():
        if designation_date != '':
            designation_date += '\n '
            trade_names += '\n '
            eu_number += '\n '
            implemented_on += '\n '
            indication += '\n '
            product += '\n '
        designation_date += row['Designation date']
        trade_names += row['Trade names']
        eu_number += row['EU number']
        implemented_on += row['Implemented on']
        indication += row['Indication']
        product += row['Product']
        n_approved += row['Number of MAs']
        n_designations += row['Number of designations']
    
    # Append to container
    marketed_ods = marketed_ods.append(pd.DataFrame({
        'Product': [product],
        'Number of designations': [n_designations],
        'Indication': [indication],
        'Designation date': [designation_date],
        'Trade name': [trade_name],
        'Trade names': [trade_names],
        'Number of MAs': [n_approved],
        'EU number': [eu_number],
        'Implemented on': [implemented_on]}))

marketed_ods

Unnamed: 0,Indication,Implemented on,Number of MAs,Trade names,Designation date,Trade name,Product,EU number,Number of designations
0,Treatment of anthrax,19 Nov 2020,1,Obiltoxaximab SFL,24 Aug 2018,Obiltoxaximab SFL,obiltoxaximab,EU/1/20/1485,1
0,Treatment of diffuse large B-cell lymphoma,20 Jan 2020,1,Polivy,16 Apr 2018,Polivy,polatuzumab vedotin,EU/1/19/1388,1
0,Treatment of mantle cell lymphoma,15 Dec 2020,1,Tecartus,13 Nov 2019,Tecartus,autologous peripheral blood t cells cd4 and cd...,EU/1/20/1492,1
0,Treatment of spinal muscular atrophy,29 Mar 2021,1,Evrysdi,26 Feb 2019,Evrysdi,risdiplam,EU/1/21/1531,1
0,Treatment of anal fistula,27 Mar 2018,1,Alofisel,08 Oct 2009,Alofisel,expanded human allogeneic mesenchymal adult st...,EU/1/17/1261,1
...,...,...,...,...,...,...,...,...,...
0,Treatment of Duchenne muscular dystrophy\n Tre...,-\n 10 Sep 2015\n -\n -,1,-\n Raxone\n -\n -,20 Mar 2007\n 15 Feb 2007\n 08 Mar 2004\n 20 N...,Raxone,idebenone,-\n EU/1/15/1020\n -\n -,4
0,Treatment of Berardinelli-Seip syndrome\n Trea...,01 Aug 2018\n 01 Aug 2018\n 01 Aug 2018\n 01 A...,4,Myalepta\n Myalepta\n Myalepta\n Myalepta,17 Jul 2012\n 17 Jul 2012\n 17 Jul 2012\n 17 J...,Myalepta,metreleptin,EU/1/18/1276\n EU/1/18/1276\n EU/1/18/1276\n E...,4
0,Treatment of moderate and severe closed trauma...,-\n -\n -\n 10 Jul 2018,1,-\n -\n -\n Verkazia,01 Oct 2010\n 29 Oct 2007\n 22 Oct 2007\n 06 A...,Verkazia,ciclosporin,-\n -\n -\n EU/1/17/1219,4
0,Treatment of citrullinaemia type 2\n Treatment...,-\n 01 Dec 2015\n 01 Dec 2015\n 01 Dec 2015\n ...,6,-\n Ravicti\n Ravicti\n Ravicti\n Ravicti\n Ra...,10 Jun 2010\n 10 Jun 2010\n 10 Jun 2010\n 10 J...,Ravicti,glyceryl tri-(4-phenylbutyrate),-\n EU/1/15/1062\n EU/1/15/1062\n EU/1/15/1062...,7


In [76]:
mask = (marketed_ods['Number of MAs'] > 0) & (marketed_ods['Number of designations'] > 1)
marketed_ods[mask]

Unnamed: 0,Indication,Implemented on,Number of MAs,Trade names,Designation date,Trade name,Product,EU number,Number of designations
0,Treatment of senile systemic amyloidosis\n Tre...,19 Feb 2020\n 18 Nov 2011,2,Vyndaqel\n Vyndaqel,08 Nov 2012\n 28 Aug 2006,Vyndaqel,"tafamidis\n n-methyl d-(2,3,4,5,6-pentahydroxy...",EU/1/11/717\n EU/1/11/717,2
0,Treatment of inherited retinal dystrophies (in...,05 Dec 2018\n 05 Dec 2018,2,Luxturna\n Luxturna,28 Jul 2015\n 02 Apr 2012,Luxturna,adenovirus-associated viral vector serotype 2 ...,EU/1/18/1331\n EU/1/18/1331,2
0,Treatment of mantle cell lymphoma\n Treatment ...,23 Oct 2014\n 23 Oct 2014\n -\n -\n 07 Jul 201...,3,IMBRUVICA\n IMBRUVICA\n -\n -\n IMBRUVICA\n -\n -,12 Mar 2013\n 26 Apr 2012\n 18 Nov 2016\n 10 A...,IMBRUVICA,1-[(3r)-3-[4-amino-3-(4-phenoxyphenyl)-1h-pyra...,EU/1/14/945\n EU/1/14/945\n -\n -\n EU/1/14/94...,7
0,Treatment of cutaneous T-cell lymphoma\n Treat...,19 Dec 2017\n 30 Oct 2012\n 30 Oct 2012,3,ADCETRIS\n ADCETRIS\n ADCETRIS,11 Jan 2012\n 15 Jan 2009\n 15 Jan 2009,ADCETRIS,brentuximab vedotin\n monoclonal antibody agai...,EU/1/12/794\n EU/1/12/794\n EU/1/12/794,3
0,Treatment of diffuse large B cell lymphoma\n T...,27 Aug 2018\n -\n -\n -\n 27 Aug 2018\n -,2,Yescarta\n -\n -\n -\n Yescarta\n -,16 Dec 2014\n 11 Nov 2015\n 11 Nov 2015\n 11 N...,Yescarta,autologous t cells transduced with retroviral ...,EU/1/18/1299\n -\n -\n -\n EU/1/18/1299\n -,6
0,Treatment of chronic myeloid leukaemia\n Treat...,03 Jul 2013\n 03 Jul 2013,2,Iclusig\n Iclusig,02 Feb 2010\n 02 Feb 2010,Iclusig,"benzamide, 3-(2-imidazo[1,2-b]pyridazin-3-ylet...",EU/1/13/839\n EU/1/13/839,2
0,Treatment of myeloid/lymphoid neoplasms with e...,-\n 29 Mar 2021,1,-\n Pemazyre,17 Oct 2019\n 24 Aug 2018,Pemazyre,pemigatinib,-\n EU/1/21/1535,2
0,Treatment of Lennox-Gastaut syndrome\n Treatme...,-\n 21 Dec 2020,1,-\n Fintepla,27 Feb 2017\n 18 Dec 2013,Fintepla,fenfluramine hydrochloride,-\n EU/1/20/1491,2
0,Treatment of mastocytosis\n Treatment of acute...,20 Sep 2017\n 20 Sep 2017,2,Rydapt\n Rydapt,04 Aug 2010\n 29 Jul 2004,Rydapt,midostaurin,EU/1/17/1218\n EU/1/17/1218,2
0,Treatment of invasive aspergillosis\n Treatmen...,19 Oct 2015\n 19 Oct 2015,2,Cresemba\n Cresemba,04 Jul 2014\n 04 Jun 2014,Cresemba,isavuconazonium sulfate,EU/1/15/1036\n EU/1/15/1036,2


In [79]:
mask = (marketed_ods['Number of MAs'] > 0) & (marketed_ods['Number of designations'] > 1)
len(marketed_ods[mask])

36

### How many are approved to treat multiple diseases?

In [80]:
mask = (marketed_ods['Number of MAs'] > 1)
marketed_ods[mask]

Unnamed: 0,Indication,Implemented on,Number of MAs,Trade names,Designation date,Trade name,Product,EU number,Number of designations
0,Treatment of senile systemic amyloidosis\n Tre...,19 Feb 2020\n 18 Nov 2011,2,Vyndaqel\n Vyndaqel,08 Nov 2012\n 28 Aug 2006,Vyndaqel,"tafamidis\n n-methyl d-(2,3,4,5,6-pentahydroxy...",EU/1/11/717\n EU/1/11/717,2
0,Treatment of inherited retinal dystrophies (in...,05 Dec 2018\n 05 Dec 2018,2,Luxturna\n Luxturna,28 Jul 2015\n 02 Apr 2012,Luxturna,adenovirus-associated viral vector serotype 2 ...,EU/1/18/1331\n EU/1/18/1331,2
0,Treatment of mantle cell lymphoma\n Treatment ...,23 Oct 2014\n 23 Oct 2014\n -\n -\n 07 Jul 201...,3,IMBRUVICA\n IMBRUVICA\n -\n -\n IMBRUVICA\n -\n -,12 Mar 2013\n 26 Apr 2012\n 18 Nov 2016\n 10 A...,IMBRUVICA,1-[(3r)-3-[4-amino-3-(4-phenoxyphenyl)-1h-pyra...,EU/1/14/945\n EU/1/14/945\n -\n -\n EU/1/14/94...,7
0,Treatment of cutaneous T-cell lymphoma\n Treat...,19 Dec 2017\n 30 Oct 2012\n 30 Oct 2012,3,ADCETRIS\n ADCETRIS\n ADCETRIS,11 Jan 2012\n 15 Jan 2009\n 15 Jan 2009,ADCETRIS,brentuximab vedotin\n monoclonal antibody agai...,EU/1/12/794\n EU/1/12/794\n EU/1/12/794,3
0,Treatment of diffuse large B cell lymphoma\n T...,27 Aug 2018\n -\n -\n -\n 27 Aug 2018\n -,2,Yescarta\n -\n -\n -\n Yescarta\n -,16 Dec 2014\n 11 Nov 2015\n 11 Nov 2015\n 11 N...,Yescarta,autologous t cells transduced with retroviral ...,EU/1/18/1299\n -\n -\n -\n EU/1/18/1299\n -,6
0,Treatment of chronic myeloid leukaemia\n Treat...,03 Jul 2013\n 03 Jul 2013,2,Iclusig\n Iclusig,02 Feb 2010\n 02 Feb 2010,Iclusig,"benzamide, 3-(2-imidazo[1,2-b]pyridazin-3-ylet...",EU/1/13/839\n EU/1/13/839,2
0,Treatment of mastocytosis\n Treatment of acute...,20 Sep 2017\n 20 Sep 2017,2,Rydapt\n Rydapt,04 Aug 2010\n 29 Jul 2004,Rydapt,midostaurin,EU/1/17/1218\n EU/1/17/1218,2
0,Treatment of invasive aspergillosis\n Treatmen...,19 Oct 2015\n 19 Oct 2015,2,Cresemba\n Cresemba,04 Jul 2014\n 04 Jun 2014,Cresemba,isavuconazonium sulfate,EU/1/15/1036\n EU/1/15/1036,2
0,Treatment of papillary thyroid cancer\n Treatm...,27 May 2014\n 27 May 2014,2,Nexavar\n Nexavar,13 Nov 2013\n 13 Nov 2013,Nexavar,sorafenib tosylate,EU/1/06/342\n EU/1/06/342,2
0,Treatment of diffuse large B-cell lymphoma\n T...,27 Aug 2018\n 27 Aug 2018,2,Kymriah\n Kymriah,14 Oct 2016\n 29 Apr 2014,Kymriah,autologous t cells transduced with lentiviral ...,EU/1/18/1297\n EU/1/18/1297,2


In [81]:
mask = (marketed_ods['Number of MAs'] > 1)
len(marketed_ods[mask])

18

## How many designations per product (when merging those products with the same trade name)

In [83]:
# Sort by number of applications
marketed_ods.sort_values(by='Number of designations', inplace=True)

# Get number of applications
n_applications = []
n_substances = []
for n_application in marketed_ods['Number of designations'].unique():
    # Get number of substances
    mask = marketed_ods['Number of designations'] == n_application
    number = len(marketed_ods[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Accumulate >= 5 applications
n_applications = np.array(n_applications)
n_substances = np.array(n_substances)
mask = n_applications >= 3
n_substances_three_or_more = np.sum(n_substances[mask])
n_applications = n_applications[~mask]
n_substances = list(n_substances[~mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.T10
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=[
        '%s ODD' % str(n) for n in n_applications] + ['>2 ODD'],
    values=n_substances + [n_substances_three_or_more],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=[colours[0], colours[-1], colours[2]], 
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=False)
fig.show()

In [85]:
# Filter marketed drugs because those are already above
mask = substance_applications['Number of MAs'] == 0
temp = substance_applications[mask]

# Get number of applications
n_applications = []
n_substances = []
for n_application in temp['Number of designations'].unique():
    # Get number of substances
    mask = temp['Number of designations'] == n_application
    number = len(temp[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Accumulate >= 5 applications
n_applications = np.array(n_applications)
n_substances = np.array(n_substances)
mask = n_applications >= 3
n_substances_three_or_more = np.sum(n_substances[mask])
n_applications = n_applications[~mask]
n_substances = list(n_substances[~mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.T10
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=[
        '%s ODD' % str(n) for n in n_applications] + ['>2 ODD'],
    values=n_substances + [n_substances_three_or_more],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=[colours[0], colours[-1], colours[2]], 
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=False)
fig.show()

In [86]:
# Combine the above figures to onw
fig = go.Figure()
colours = plotly.colors.qualitative.T10
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=['1 ODD', '2 ODD', '>2 ODD'],
    values=[85 + 1127, 23 + 144, 13+ 41],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=[colours[0], colours[-1], colours[2]], 
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=False)
fig.show()

In [87]:
# Sort by number of applications
marketed_ods.sort_values(by='Number of designations', inplace=True)

# Get number of applications
n_applications = []
n_substances = []
for n_application in marketed_ods['Number of designations'].unique():
    if n_application < 2:
        continue

    # Get number of substances
    mask = marketed_ods['Number of designations'] == n_application
    number = len(marketed_ods[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Accumulate >= 5 applications
n_applications = np.array(n_applications)
n_substances = np.array(n_substances)
mask = n_applications >= 5
n_substances_five_or_more = np.sum(n_substances[mask])
n_applications = n_applications[~mask]
n_substances = list(n_substances[~mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=[
        '%s ODD' % str(n) for n in n_applications] + ['>5'],
    values=n_substances + [n_substances_five_or_more],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=['rgb(33, 75, 99)', 'rgb(79, 129, 102)', 'rgb(151, 179, 100)',
                 'rgb(175, 49, 35)', 'rgb(36, 73, 147)'], 
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=True)
fig.show()

In [88]:
# Filter marketed drugs because those are already above
mask = substance_applications['Number of MAs'] == 0
temp = substance_applications[mask]

# Get number of applications
n_applications = []
n_substances = []
for n_application in temp['Number of designations'].unique():
    if n_application < 2:
        continue

    # Get number of substances
    mask = temp['Number of designations'] == n_application
    number = len(temp[mask])

    # Append to containers
    n_applications.append(n_application)
    n_substances.append(number)

# Accumulate >= 5 applications
n_applications = np.array(n_applications)
n_substances = np.array(n_substances)
mask = n_applications >= 5
n_substances_five_or_more = np.sum(n_substances[mask])
n_applications = n_applications[~mask]
n_substances = list(n_substances[~mask])

# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=[
        '%s ODD' % str(n) for n in n_applications] + ['>5'],
    values=n_substances + [n_substances_five_or_more],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=['rgb(33, 75, 99)', 'rgb(79, 129, 102)', 'rgb(151, 179, 100)',
                 'rgb(175, 49, 35)', 'rgb(36, 73, 147)'], 
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=True)
fig.show()

In [89]:
# Create figure
fig = go.Figure()
colours = plotly.colors.qualitative.Plotly
n_colours = len(n_substances) + 1

fig.add_trace(go.Pie(
    labels=[
        '2 ODD', '3 ODD', '4 ODD', '>5 ODD'],
    values=[144+23, 24+6, 8+3, 9+4],
    textinfo='label+value+percent',
    sort=False,
    marker=dict(
        colors=['rgb(33, 75, 99)', 'rgb(79, 129, 102)', 'rgb(151, 179, 100)',
                 'rgb(175, 49, 35)', 'rgb(36, 73, 147)'], 
        line=dict(color='#000000', width=1))))
fig.update_layout(showlegend=True)
fig.show()

#### Export table

In [82]:
mask = marketed_ods['Number of MAs'] > 1
temp = marketed_ods[mask][['Trade name', 'Product', 'Indication', 'Number of designations', 'Designation date', 'Number of MAs', 'Implemented on']]

directory = os.path.dirname(os.getcwd())
path = os.path.join(directory, 'data/ODs_marketed_for_multiple_diseases.csv')
temp.to_csv(path, index=False)