From c30d0526cc80afd383614d1e3fbaaeef84ce9b46 Mon Sep 17 00:00:00 2001 From: Anders Goncalves da Silva Date: Sun, 1 Dec 2019 17:21:30 -0800 Subject: [PATCH] Update to plotly plot --- validation/validation.ipynb | 1005 +++++++++++++++++++++++++++++++++-- 1 file changed, 967 insertions(+), 38 deletions(-) diff --git a/validation/validation.ipynb b/validation/validation.ipynb index b7163af..cd756bf 100644 --- a/validation/validation.ipynb +++ b/validation/validation.ipynb @@ -21,7 +21,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 34, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -30,19 +30,36 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "/Users/andersgoncalves/OneDrive - The University of Melbourne/dev/EmMAIL/validation\n" + } + ], + "source": [ + "%cd validation" + ] + }, + { + "cell_type": "code", + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "## Load modules\n", "import collections\n", "\n", - "import pandas as pd" + "import pandas as pd\n", + "\n", + "import plotly.express as px" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -50,28 +67,29 @@ "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AliasENA-RUNEMM-Sangeremm-WGStyper-blast-settingmatch to sanger seqmatch to PHE emm typerReason for mismatch to either PHE typer or sanger sequence
0GASEMM2899ERR17324391.01.01YESYESNaN
1GASEMM0549ERR17324404.04.04YESYESNaN
2GASEMM2277ERR173244112.012.012YESYESNaN
3GASEMM0521ERR173244212.012.012YESYESNaN
4GASEMM2336ERR17324436.06.06YESYESNaN
\n
", "text/plain": " Alias ENA-RUN EMM-Sanger emm-WGS typer-blast-setting \\\n0 GASEMM2899 ERR1732439 1.0 1.0 1 \n1 GASEMM0549 ERR1732440 4.0 4.0 4 \n2 GASEMM2277 ERR1732441 12.0 12.0 12 \n3 GASEMM0521 ERR1732442 12.0 12.0 12 \n4 GASEMM2336 ERR1732443 6.0 6.0 6 \n\n match to sanger seq match to PHE emm typer \\\n0 YES YES \n1 YES YES \n2 YES YES \n3 YES YES \n4 YES YES \n\n Reason for mismatch to either PHE typer or sanger sequence \n0 NaN \n1 NaN \n2 NaN \n3 NaN \n4 NaN " }, - "execution_count": 31, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Load data\n", + "\n", "data = pd.read_csv(\"emmtyper_validation.csv\", skiprows=1)\n", "data.head()\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AliasENA-RUNEMM-Sangeremm-WGStyper-blast-settingmatch to sanger seqmatch to PHE emm typerReason for mismatch to either PHE typer or sanger sequence
count3047304730473047304329992999165
unique304730471842001822333
topGASEMM0930ERR17349551.01.01YESYESExplain - different sample sanger
freq115745745712909296356
\n
", - "text/plain": " Alias ENA-RUN EMM-Sanger emm-WGS typer-blast-setting \\\ncount 3047 3047 3047 3047 3043 \nunique 3047 3047 184 200 182 \ntop GASEMM0930 ERR1734955 1.0 1.0 1 \nfreq 1 1 574 574 571 \n\n match to sanger seq match to PHE emm typer \\\ncount 2999 2999 \nunique 2 3 \ntop YES YES \nfreq 2909 2963 \n\n Reason for mismatch to either PHE typer or sanger sequence \ncount 165 \nunique 33 \ntop Explain - different sample sanger \nfreq 56 " + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AliasENA-RUNEMM-Sangeremm-WGStyper-blast-settingmatch to sanger seqmatch to PHE emm typerReason for mismatch to either PHE typer or sanger sequence
count3047304730473047304329992999165
unique304730471842001822330
topGASEMM1756ERR17336551.01.01YESYESExplain - different sample sanger
freq115745745712909296356
\n
", + "text/plain": " Alias ENA-RUN EMM-Sanger emm-WGS typer-blast-setting \\\ncount 3047 3047 3047 3047 3043 \nunique 3047 3047 184 200 182 \ntop GASEMM1756 ERR1733655 1.0 1.0 1 \nfreq 1 1 574 574 571 \n\n match to sanger seq match to PHE emm typer \\\ncount 2999 2999 \nunique 2 3 \ntop YES YES \nfreq 2909 2963 \n\n Reason for mismatch to either PHE typer or sanger sequence \ncount 165 \nunique 30 \ntop Explain - different sample sanger \nfreq 56 " }, - "execution_count": 2, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -84,14 +102,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": "Index(['Alias', 'ENA-RUN', 'EMM-Sanger', 'emm-WGS', 'typer-blast-setting',\n 'match to sanger seq', 'match to PHE emm typer',\n 'Reason for mismatch to either PHE typer or sanger sequence'],\n dtype='object')" }, - "execution_count": 8, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -102,15 +120,15 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AliasENA-RUNEMM-Sangeremm-WGStyper-blast-settingmatch to sanger seqmatch to PHE emm typerReason for mismatch to either PHE typer or sanger sequence
202GASEMM0411ERR173264128.028.0/3.13.1;28.0NaNYESExplain - poor genome quality
203GASEMM2046ERR173264289.08989.0~NaNNaNExplain - hit but not high confidence
283GASEMM1684ERR1732722170.07575.0~NaNNaNExplain - hit but not high confidence, differe...
324GASEMM0170ERR17327634.0Failed:No mapping to references-NaNNaNExplain - partial genome (59 bp)
438GASEMM3043ERR1732877NTNT-NaNNaNExplain - untypable isolate ?? Different species?
566GASEMM0614ERR17330056.06-NaNNaNExplain - fail
623GASEMM2753ERR17330626.996.99-NaNNaNExplain - fail
670GASEMM2426ERR17331095.235.23-NaNNaNExplain - poor genome quality
764GASEMM1637ERR17332035.235.23-NaNNaNExplain - Campylobacter genome
908GASEMM0459ERR17333476.06.0-NaNNaNExplain - fail
957GASEMM0319ERR17333966.06.0-NaNNaNExplain - fail.
1099GASEMM2653ERR1733538126.189NaNNaNNaNNOT TESTED WITH NEW TOOL.
1108GASEMM1138ERR17335475.1195.119-NaNNaNExplain - fail
1117GASEMM3045ERR1733556NT203-NaNNaNExplain - NT by sanger and new tool.
1134GASEMM0364ERR17335736.066.78~NaNNaNExplain - subtype low confidence
1145GASEMM1000ERR17335843.1/12.03.13.1NaNYESExplain - different sample sanger
1170GASEMM0477ERR17336096.06.0-NaNNaNExplain - fail
1208GASEMM2196ERR17336475.235.23-NaNNaNExplain - fail
1330GASEMM2966ERR17337695.235.23-NaNNaNExplain
1368GASEMM1078ERR173380789.01.089.0;1.0NaNNaNExplain - double isoaltes
1573GASEMM0483ERR17340126.06.0-NaNNaNExplain
1575GASEMM1508ERR17340146.06.0-NaNNaNExplain
1612GASEMM0171ERR17340514.0Failed:156NaNNaNNaNExplain - PHE fail / Not Tested
1613GASEMM0586ERR17340526.06.0-NaNNaNExplain - fail
1821GASEMM0096ERR173426075.075.0-NaNNaNExplain - E.coli genome
1835GASEMM2759ERR17342745.55.5-NaNNaNExplain - fail
1839GASEMM2610ERR17342781.03434.1;230.0NaNNaNExplain - poor genome quality/ mixed sample
1883GASEMM0804ERR17343226.06.0-NaNNaNExplain
1905GASEMM2945ERR1734344246.0159.0/246.0NaNNaNNaNNOT RUN IN NEW TOOL
1921GASEMM0482ERR17343606.06.0-NaNNaNExplain
2016GASEMM2003ERR17344551.01.01.0;12.0NaNNaNExplain - mixed sample
2033GASEMM3046ERR1734472NTNT-NaNNaNExplain
2080GASEMM0763ERR17345196.956.95-NaNNaNExplain
2124GASEMM1942ERR1734563230.03434.1;230.0NaNNaNExplain - mixed sample
2240GASEMM0205ERR17346795.235.23-NaNNaNExplain - fail
2289GASEMM0462ERR17347286.06.0-NaNNaNExplain
2293GASEMM2695ERR173473289.2989.29NaNNaNNaNExplain
2333GASEMM1190ERR17347725.1135.113-NaNNaNExplain
2455GASEMM0475ERR17348946.06.0-NaNNaNExplain
2479GASEMM2155ERR17349186.76.7-NaNNaNExplain
2500GASEMM1803ERR17349399.03.19.0;3.1NaNNaNExplain - poor genome
2587GASEMM0750ERR17350266.06.06.0;3.1NaNNaNExplain - poor genome quality
2643GASEMM2823ERR17350826.76.7-NaNNaNExplain
2707GASEMM3047ERR1735146NT5.235.23NaNYESExplain - no sanger sequence results
2708GASEMM3044ERR1735147NT203-NaNNaNExplain - no sanger sequence results
2795GASEMM0246ERR173523418.0Failed-NaNNaNExplain - B.licheniformis genome
2920GASEMM1683ERR17353595.35.3-NaNNaNExplain - fail
3004GASEMM1986ERR17354434.13n/d12.0;4.13NaNNaNExplain - poor genome quality
\n
", - "text/plain": " Alias ENA-RUN EMM-Sanger emm-WGS \\\n202 GASEMM0411 ERR1732641 28.0 28.0/3.1 \n203 GASEMM2046 ERR1732642 89.0 89 \n283 GASEMM1684 ERR1732722 170.0 75 \n324 GASEMM0170 ERR1732763 4.0 Failed:No mapping to references \n438 GASEMM3043 ERR1732877 NT NT \n566 GASEMM0614 ERR1733005 6.0 6 \n623 GASEMM2753 ERR1733062 6.99 6.99 \n670 GASEMM2426 ERR1733109 5.23 5.23 \n764 GASEMM1637 ERR1733203 5.23 5.23 \n908 GASEMM0459 ERR1733347 6.0 6.0 \n957 GASEMM0319 ERR1733396 6.0 6.0 \n1099 GASEMM2653 ERR1733538 126.1 89 \n1108 GASEMM1138 ERR1733547 5.119 5.119 \n1117 GASEMM3045 ERR1733556 NT 203 \n1134 GASEMM0364 ERR1733573 6.0 6 \n1145 GASEMM1000 ERR1733584 3.1/12.0 3.1 \n1170 GASEMM0477 ERR1733609 6.0 6.0 \n1208 GASEMM2196 ERR1733647 5.23 5.23 \n1330 GASEMM2966 ERR1733769 5.23 5.23 \n1368 GASEMM1078 ERR1733807 89.0 1.0 \n1573 GASEMM0483 ERR1734012 6.0 6.0 \n1575 GASEMM1508 ERR1734014 6.0 6.0 \n1612 GASEMM0171 ERR1734051 4.0 Failed:156 \n1613 GASEMM0586 ERR1734052 6.0 6.0 \n1821 GASEMM0096 ERR1734260 75.0 75.0 \n1835 GASEMM2759 ERR1734274 5.5 5.5 \n1839 GASEMM2610 ERR1734278 1.0 34 \n1883 GASEMM0804 ERR1734322 6.0 6.0 \n1905 GASEMM2945 ERR1734344 246.0 159.0/246.0 \n1921 GASEMM0482 ERR1734360 6.0 6.0 \n2016 GASEMM2003 ERR1734455 1.0 1.0 \n2033 GASEMM3046 ERR1734472 NT NT \n2080 GASEMM0763 ERR1734519 6.95 6.95 \n2124 GASEMM1942 ERR1734563 230.0 34 \n2240 GASEMM0205 ERR1734679 5.23 5.23 \n2289 GASEMM0462 ERR1734728 6.0 6.0 \n2293 GASEMM2695 ERR1734732 89.29 89.29 \n2333 GASEMM1190 ERR1734772 5.113 5.113 \n2455 GASEMM0475 ERR1734894 6.0 6.0 \n2479 GASEMM2155 ERR1734918 6.7 6.7 \n2500 GASEMM1803 ERR1734939 9.0 3.1 \n2587 GASEMM0750 ERR1735026 6.0 6.0 \n2643 GASEMM2823 ERR1735082 6.7 6.7 \n2707 GASEMM3047 ERR1735146 NT 5.23 \n2708 GASEMM3044 ERR1735147 NT 203 \n2795 GASEMM0246 ERR1735234 18.0 Failed \n2920 GASEMM1683 ERR1735359 5.3 5.3 \n3004 GASEMM1986 ERR1735443 4.13 n/d \n\n typer-blast-setting match to sanger seq match to PHE emm typer \\\n202 3.1;28.0 NaN YES \n203 89.0~ NaN NaN \n283 75.0~ NaN NaN \n324 - NaN NaN \n438 - NaN NaN \n566 - NaN NaN \n623 - NaN NaN \n670 - NaN NaN \n764 - NaN NaN \n908 - NaN NaN \n957 - NaN NaN \n1099 NaN NaN NaN \n1108 - NaN NaN \n1117 - NaN NaN \n1134 6.78~ NaN NaN \n1145 3.1 NaN YES \n1170 - NaN NaN \n1208 - NaN NaN \n1330 - NaN NaN \n1368 89.0;1.0 NaN NaN \n1573 - NaN NaN \n1575 - NaN NaN \n1612 NaN NaN NaN \n1613 - NaN NaN \n1821 - NaN NaN \n1835 - NaN NaN \n1839 34.1;230.0 NaN NaN \n1883 - NaN NaN \n1905 NaN NaN NaN \n1921 - NaN NaN \n2016 1.0;12.0 NaN NaN \n2033 - NaN NaN \n2080 - NaN NaN \n2124 34.1;230.0 NaN NaN \n2240 - NaN NaN \n2289 - NaN NaN \n2293 NaN NaN NaN \n2333 - NaN NaN \n2455 - NaN NaN \n2479 - NaN NaN \n2500 9.0;3.1 NaN NaN \n2587 6.0;3.1 NaN NaN \n2643 - NaN NaN \n2707 5.23 NaN YES \n2708 - NaN NaN \n2795 - NaN NaN \n2920 - NaN NaN \n3004 12.0;4.13 NaN NaN \n\n Reason for mismatch to either PHE typer or sanger sequence \n202 Explain - poor genome quality \n203 Explain - hit but not high confidence \n283 Explain - hit but not high confidence, differe... \n324 Explain - partial genome (59 bp) \n438 Explain - untypable isolate ?? Different species? \n566 Explain - fail \n623 Explain - fail \n670 Explain - poor genome quality \n764 Explain - Campylobacter genome \n908 Explain - fail \n957 Explain - fail. \n1099 NOT TESTED WITH NEW TOOL. \n1108 Explain - fail \n1117 Explain - NT by sanger and new tool. \n1134 Explain - subtype low confidence \n1145 Explain - different sample sanger \n1170 Explain - fail \n1208 Explain - fail \n1330 Explain \n1368 Explain - double isoaltes \n1573 Explain \n1575 Explain \n1612 Explain - PHE fail / Not Tested \n1613 Explain - fail \n1821 Explain - E.coli genome \n1835 Explain - fail \n1839 Explain - poor genome quality/ mixed sample \n1883 Explain \n1905 NOT RUN IN NEW TOOL \n1921 Explain \n2016 Explain - mixed sample \n2033 Explain \n2080 Explain \n2124 Explain - mixed sample \n2240 Explain - fail \n2289 Explain \n2293 Explain \n2333 Explain \n2455 Explain \n2479 Explain \n2500 Explain - poor genome \n2587 Explain - poor genome quality \n2643 Explain \n2707 Explain - no sanger sequence results \n2708 Explain - no sanger sequence results \n2795 Explain - B.licheniformis genome \n2920 Explain - fail \n3004 Explain - poor genome quality " + "text/plain": " Alias ENA-RUN EMM-Sanger emm-WGS \\\n202 GASEMM0411 ERR1732641 28.0 28.0/3.1 \n203 GASEMM2046 ERR1732642 89.0 89 \n283 GASEMM1684 ERR1732722 170.0 75 \n324 GASEMM0170 ERR1732763 4.0 Failed:No mapping to references \n438 GASEMM3043 ERR1732877 NT NT \n566 GASEMM0614 ERR1733005 6.0 6 \n623 GASEMM2753 ERR1733062 6.99 6.99 \n670 GASEMM2426 ERR1733109 5.23 5.23 \n764 GASEMM1637 ERR1733203 5.23 5.23 \n908 GASEMM0459 ERR1733347 6.0 6.0 \n957 GASEMM0319 ERR1733396 6.0 6.0 \n1099 GASEMM2653 ERR1733538 126.1 89 \n1108 GASEMM1138 ERR1733547 5.119 5.119 \n1117 GASEMM3045 ERR1733556 NT 203 \n1134 GASEMM0364 ERR1733573 6.0 6 \n1145 GASEMM1000 ERR1733584 3.1/12.0 3.1 \n1170 GASEMM0477 ERR1733609 6.0 6.0 \n1208 GASEMM2196 ERR1733647 5.23 5.23 \n1330 GASEMM2966 ERR1733769 5.23 5.23 \n1368 GASEMM1078 ERR1733807 89.0 1.0 \n1573 GASEMM0483 ERR1734012 6.0 6.0 \n1575 GASEMM1508 ERR1734014 6.0 6.0 \n1612 GASEMM0171 ERR1734051 4.0 Failed:156 \n1613 GASEMM0586 ERR1734052 6.0 6.0 \n1821 GASEMM0096 ERR1734260 75.0 75.0 \n1835 GASEMM2759 ERR1734274 5.5 5.5 \n1839 GASEMM2610 ERR1734278 1.0 34 \n1883 GASEMM0804 ERR1734322 6.0 6.0 \n1905 GASEMM2945 ERR1734344 246.0 159.0/246.0 \n1921 GASEMM0482 ERR1734360 6.0 6.0 \n2016 GASEMM2003 ERR1734455 1.0 1.0 \n2033 GASEMM3046 ERR1734472 NT NT \n2080 GASEMM0763 ERR1734519 6.95 6.95 \n2124 GASEMM1942 ERR1734563 230.0 34 \n2240 GASEMM0205 ERR1734679 5.23 5.23 \n2289 GASEMM0462 ERR1734728 6.0 6.0 \n2293 GASEMM2695 ERR1734732 89.29 89.29 \n2333 GASEMM1190 ERR1734772 5.113 5.113 \n2455 GASEMM0475 ERR1734894 6.0 6.0 \n2479 GASEMM2155 ERR1734918 6.7 6.7 \n2500 GASEMM1803 ERR1734939 9.0 3.1 \n2587 GASEMM0750 ERR1735026 6.0 6.0 \n2643 GASEMM2823 ERR1735082 6.7 6.7 \n2707 GASEMM3047 ERR1735146 NT 5.23 \n2708 GASEMM3044 ERR1735147 NT 203 \n2795 GASEMM0246 ERR1735234 18.0 Failed \n2920 GASEMM1683 ERR1735359 5.3 5.3 \n3004 GASEMM1986 ERR1735443 4.13 n/d \n\n typer-blast-setting match to sanger seq match to PHE emm typer \\\n202 3.1;28.0 NaN YES \n203 89.0~ NaN NaN \n283 75.0~ NaN NaN \n324 - NaN NaN \n438 - NaN NaN \n566 - NaN NaN \n623 - NaN NaN \n670 - NaN NaN \n764 - NaN NaN \n908 - NaN NaN \n957 - NaN NaN \n1099 NaN NaN NaN \n1108 - NaN NaN \n1117 - NaN NaN \n1134 6.78~ NaN NaN \n1145 3.1 NaN YES \n1170 - NaN NaN \n1208 - NaN NaN \n1330 - NaN NaN \n1368 89.0;1.0 NaN NaN \n1573 - NaN NaN \n1575 - NaN NaN \n1612 NaN NaN NaN \n1613 - NaN NaN \n1821 - NaN NaN \n1835 - NaN NaN \n1839 34.1;230.0 NaN NaN \n1883 - NaN NaN \n1905 NaN NaN NaN \n1921 - NaN NaN \n2016 1.0;12.0 NaN NaN \n2033 - NaN NaN \n2080 - NaN NaN \n2124 34.1;230.0 NaN NaN \n2240 - NaN NaN \n2289 - NaN NaN \n2293 NaN NaN NaN \n2333 - NaN NaN \n2455 - NaN NaN \n2479 - NaN NaN \n2500 9.0;3.1 NaN NaN \n2587 6.0;3.1 NaN NaN \n2643 - NaN NaN \n2707 5.23 NaN YES \n2708 - NaN NaN \n2795 - NaN NaN \n2920 - NaN NaN \n3004 12.0;4.13 NaN NaN \n\n Reason for mismatch to either PHE typer or sanger sequence \n202 Explain - poor genome quality \n203 Explain - hit but not high confidence \n283 Explain - hit but not high confidence, differe... \n324 Explain - partial genome (59 bp) \n438 Explain - untypable isolate ?? Different species? \n566 Explain - fail \n623 Explain - fail \n670 Explain - poor genome quality \n764 Explain - Campylobacter genome \n908 Explain - fail \n957 Explain - fail. \n1099 NOT TESTED WITH NEW TOOL. \n1108 Explain - fail \n1117 Explain - NT by sanger and new tool. \n1134 Explain - subtype low confidence \n1145 Explain - different sample sanger \n1170 Explain - fail \n1208 Explain - fail \n1330 Explain \n1368 Explain - double isoaltes \n1573 Explain \n1575 Explain \n1612 Explain - PHE fail / Not Tested \n1613 Explain - fail \n1821 Explain - E.coli genome \n1835 Explain - fail \n1839 Explain - poor genome quality/ mixed sample \n1883 Explain \n1905 NOT RUN IN NEW TOOL \n1921 Explain \n2016 Explain - mixed sample \n2033 Explain \n2080 Explain \n2124 Explain - mixed sample \n2240 Explain - fail \n2289 Explain \n2293 Explain \n2333 Explain \n2455 Explain \n2479 Explain \n2500 Explain - poor genome \n2587 Explain - poor genome quality \n2643 Explain \n2707 Explain - no sanger sequence results \n2708 Explain - no sanger sequence results \n2795 Explain - B.licheniformis genome \n2920 Explain - fail \n3004 Explain - poor genome quality " }, - "execution_count": 30, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -123,29 +141,9 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 36, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": "" - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEUCAYAAAAiMOHqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAFTdJREFUeJzt3X2wXVWd5vHvw1uCvCiETApJhmSYaE+ANsYUMopVKqWCL2BXtQSskoxlTSiMrahdystYMPZQJYpQzUiDkVDgSEtjQ48ZOkoHRsthemhInBQYxCKDMNwMSAwaCQgm4Td/nJ30DSTcl9zcE+76fqpunbPXXnuf3wmX89y91t77pKqQJLVnn34XIEnqDwNAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1Kj9+l3AKzniiCNq5syZ/S5Dkl5VVq1a9euqmjpUv706AGbOnMnKlSv7XYYkvaokeWw4/RwCkqRGGQCS1CgDQJIaNeQcQJLJwE+ASV3/v62qi5PMAm4GpgCrgI9V1R+STAK+DbwF2AAsqKpHu31dAHwC2Ap8uqruGPu3JEkvt3nzZgYGBnj++ef7XcqYmTx5MtOnT2f//fcf1fbDmQR+AXh3VW1Ksj9wd5IfAJ8Drqyqm5NcS++D/Zru8TdV9a+TnAlcBixIMgc4EzgWeD1wZ5I3VNXWUVUuSSMwMDDAIYccwsyZM0nS73J2W1WxYcMGBgYGmDVr1qj2MeQQUPVs6hb3734KeDfwt137jcCHu+end8t0609O71/7dODmqnqhqn4JrAVOGFXVkjRCzz//PFOmTJkQH/4ASZgyZcpuHdEMaw4gyb5JVgNPASuA/wP8tqq2dF0GgKO650cBjwN06zfSGyba3r6TbQa/1qIkK5OsXL9+/cjfkSTtwkT58N9md9/PsAKgqrZW1VxgOr2/2v9ot171lV9rSVXNr6r5U6cOeR2DJGmURnQhWFX9NsmPgH8LvC7Jft1f+dOBdV23dcAMYCDJfsBr6U0Gb2vfZvA2kvaQmef/fb9LGJZHv/KBcX29sf53Gar+quId73gHF110EaeeeioA3/ve91i6dCkrVqzg+OOP3973zDPP5Pzzz+f222/nS1/6Ei+++CKbN2/mM5/5DOecc86Y1Tycs4CmApu7D/8DgffQm9j9EfCn9M4EWgh8v9tkWbf8v7r1/72qKsky4K+TXEFvEng2cO+YvRNJ2osl4dprr+UjH/kI73rXu9iyZQsXXnghP/zhD3nTm97E6tWrd+i/efNmFi1axL333sv06dN54YUXePTRR8e0puEcARwJ3JhkX3pDRrdU1e1JHgRuTvKfgP8NLO36LwX+S5K1wNP0zvyhqtYkuQV4ENgCLPYMIEktOe644/jQhz7EZZddxrPPPsvZZ5/NMcccs9O+zzzzDFu2bGHKlCkATJo0iTe+8Y1jWs+QAVBV9wNv3kn7I+zkLJ6qeh74yC72dSlw6cjLlKSJ4eKLL2bevHkccMAB2+919vvf/565c+du73PBBRewYMECTjvtNI4++mhOPvlkPvjBD3LWWWexzz5jd/3uXn0zuFcLx1glDddBBx3EggULOPjgg5k0aRIABx544MuGgACuu+46HnjgAe68804uv/xyVqxYwQ033DBmtXgrCEkaZ/vss8+w/5I//vjj+exnP8uKFSu49dZbx7aOMd2bJGlMbNq0iR//+Mfbl1evXs3RRx89pq/hEJCkJu1NQ6IvnQM45ZRTuOiii/jqV7/KOeecw4EHHshBBx00psM/YABI0ri75JJLdljeunXnJ0QuX758j9bhEJAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlKeBSmrTJa8d4/1tHLJLEj73uc/x9a9/HYDLL7+cTZs2bT8tdMmSJVxxxRUAHHrooVxxxRWcdNJJY1vnIB4BSNI4mTRpErfddhu//vWvX7bu9ttv55vf/CZ33303Dz30ENdeey0f/ehHefLJJ/dYPQaAJI2T/fbbj0WLFnHllVe+bN1ll13G1772NY444ggA5s2bx8KFC7n66qv3WD0GgCSNo8WLF3PTTTexceOOQ0Zr1qzhLW95yw5t8+fPZ82aNXusFgNAksbRoYceytlnn81VV13V71IMAEkab+eddx5Lly7l2Wef3d42Z84cVq1atUO/VatWceyxx+6xOgwASRpnhx9+OGeccQZLly7d3vaFL3yBL37xi2zYsAHo3f75hhtu4JOf/OQeq8PTQCW1aRinbe5Jn//85/nGN76xffm0005j3bp1vO1tbyMJhxxyCN/5znc48sgj91gNBoAkjZNNmzZtfz5t2jSee+65Hdafe+65nHvuueNWj0NAktQoA0CSGmUASGpGVfW7hDG1u+/HAJDUhMmTJ7Nhw4YJEwJVxYYNG5g8efKo9+EksKQmTJ8+nYGBAdavX9/vUsbM5MmTmT59+qi3HzIAkswAvg1MAwpYUlV/meQS4N8D2/41L6yq5d02FwCfALYCn66qO7r2U4C/BPYFrquqr4y6ckkagf33359Zs2b1u4y9ynCOALYAn6+qnyY5BFiVZEW37sqqunxw5yRzgDOBY4HXA3cmeUO3+mrgPcAAcF+SZVX14Fi8EUnSyAwZAFX1BPBE9/yZJD8HjnqFTU4Hbq6qF4BfJlkLnNCtW1tVjwAkubnrawBIUh+MaBI4yUzgzcA/dU2fSnJ/kuuTHNa1HQU8Pmizga5tV+2SpD4YdgAkORi4FTivqn4HXAMcA8yld4Tw9bEoKMmiJCuTrJxIkzWStLcZVgAk2Z/eh/9NVXUbQFX9qqq2VtWLwLf452GedcCMQZtP79p21b6DqlpSVfOrav7UqVNH+n4kScM0ZAAkCbAU+HlVXTGoffAdiv4E+Fn3fBlwZpJJSWYBs4F7gfuA2UlmJTmA3kTxsrF5G5KkkRrOWUBvBz4GPJBkddd2IXBWkrn0Tg19FDgHoKrWJLmF3uTuFmBxVW0FSPIp4A56p4FeX1V77qtuJEmvaDhnAd0NZCerlr/CNpcCl+6kffkrbSdJGj/eCkKSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjhgyAJDOS/CjJg0nWJPlM1354khVJHu4eD+vak+SqJGuT3J9k3qB9Lez6P5xk4Z57W5KkoQznCGAL8PmqmgOcCCxOMgc4H7irqmYDd3XLAKcCs7ufRcA10AsM4GLgrcAJwMXbQkOSNP6GDICqeqKqfto9fwb4OXAUcDpwY9ftRuDD3fPTgW9Xzz3A65IcCbwPWFFVT1fVb4AVwClj+m4kScM2ojmAJDOBNwP/BEyrqie6VU8C07rnRwGPD9psoGvbVbskqQ+GHQBJDgZuBc6rqt8NXldVBdRYFJRkUZKVSVauX79+LHYpSdqJYQVAkv3pffjfVFW3dc2/6oZ26B6f6trXATMGbT69a9tV+w6qaklVza+q+VOnTh3Je5EkjcBwzgIKsBT4eVVdMWjVMmDbmTwLge8Paj+7OxvoRGBjN1R0B/DeJId1k7/v7dokSX2w3zD6vB34GPBAktVd24XAV4BbknwCeAw4o1u3HHg/sBZ4Dvg4QFU9neQvgPu6fl+uqqfH5F1IkkZsyACoqruB7GL1yTvpX8DiXezreuD6kRQoSdozvBJYkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSo4YMgCTXJ3kqyc8GtV2SZF2S1d3P+wetuyDJ2iS/SPK+Qe2ndG1rk5w/9m9FkjQSwzkCuAE4ZSftV1bV3O5nOUCSOcCZwLHdNn+VZN8k+wJXA6cCc4Czur6SpD7Zb6gOVfWTJDOHub/TgZur6gXgl0nWAid069ZW1SMASW7u+j444oolSWNid+YAPpXk/m6I6LCu7Sjg8UF9Brq2XbVLkvpktAFwDXAMMBd4Avj6WBWUZFGSlUlWrl+/fqx2K0l6iVEFQFX9qqq2VtWLwLf452GedcCMQV2nd227at/ZvpdU1fyqmj916tTRlCdJGoZRBUCSIwct/gmw7QyhZcCZSSYlmQXMBu4F7gNmJ5mV5AB6E8XLRl+2JGl3DTkJnOS7wDuBI5IMABcD70wyFyjgUeAcgKpak+QWepO7W4DFVbW128+ngDuAfYHrq2rNmL8bSdKwDecsoLN20rz0FfpfCly6k/blwPIRVSdJ2mO8EliSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjhgyAJNcneSrJzwa1HZ5kRZKHu8fDuvYkuSrJ2iT3J5k3aJuFXf+HkyzcM29HkjRcwzkCuAE45SVt5wN3VdVs4K5uGeBUYHb3swi4BnqBAVwMvBU4Abh4W2hIkvpjyACoqp8AT7+k+XTgxu75jcCHB7V/u3ruAV6X5EjgfcCKqnq6qn4DrODloSJJGkejnQOYVlVPdM+fBKZ1z48CHh/Ub6Br21W7JKlPdnsSuKoKqDGoBYAki5KsTLJy/fr1Y7VbSdJLjDYAftUN7dA9PtW1rwNmDOo3vWvbVfvLVNWSqppfVfOnTp06yvIkSUMZbQAsA7adybMQ+P6g9rO7s4FOBDZ2Q0V3AO9Nclg3+fverk2S1Cf7DdUhyXeBdwJHJBmgdzbPV4BbknwCeAw4o+u+HHg/sBZ4Dvg4QFU9neQvgPu6fl+uqpdOLEuSxtGQAVBVZ+1i1ck76VvA4l3s53rg+hFVJ0naY7wSWJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqN2KwCSPJrkgSSrk6zs2g5PsiLJw93jYV17klyVZG2S+5PMG4s3IEkanbE4AnhXVc2tqvnd8vnAXVU1G7irWwY4FZjd/SwCrhmD15YkjdKeGAI6Hbixe34j8OFB7d+unnuA1yU5cg+8viRpGHY3AAr4hySrkizq2qZV1RPd8yeBad3zo4DHB2070LXtIMmiJCuTrFy/fv1ulidJ2pX9dnP7k6pqXZJ/AaxI8tDglVVVSWokO6yqJcASgPnz549oW0nS8O3WEUBVresenwL+DjgB+NW2oZ3u8amu+zpgxqDNp3dtkqQ+GHUAJDkoySHbngPvBX4GLAMWdt0WAt/vni8Dzu7OBjoR2DhoqEiSNM52ZwhoGvB3Sbbt56+r6odJ7gNuSfIJ4DHgjK7/cuD9wFrgOeDju/HakqTdNOoAqKpHgDftpH0DcPJO2gtYPNrXkySNLa8ElqRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSo0b9pfB6Fbrktf2uYHgu2djvCqQmeAQgSY0yACSpUQaAJDXKOQBJe4dXwxzVBJuf8ghAkho17gGQ5JQkv0iyNsn54/36kqSecQ2AJPsCVwOnAnOAs5LMGc8aJEk9430EcAKwtqoeqao/ADcDp49zDZIkxn8S+Cjg8UHLA8BbB3dIsghY1C1uSvKLcaptwgscAfy633UM6T+m3xWoD14Vv5+vnt/No4fTaa87C6iqlgBL+l3HRJRkZVXN73cd0s74+zn+xnsIaB0wY9Dy9K5NkjTOxjsA7gNmJ5mV5ADgTGDZONcgSWKch4CqakuSTwF3APsC11fVmvGsoXEOrWlv5u/nOEtV9bsGSVIfeCWwJDXKAJCkRhkAktQoA0CSGmUATGBJ9klyRr/rkHYlyYFJ3tjvOlplAExgVfUi8IV+1yHtTJIPAauBH3bLc5N4XdA4MgAmvjuT/HmSGUkO3/bT76Ik4BJ6N4j8LUBVrQZm9bOg1ux19wLSmFvQPS4e1FbAv+pDLdJgm6tqY7LDDda8MGkcGQATXFX5F5X2VmuSfBTYN8ls4NPAP/a5pqY4BDTBJXlNkv+QZEm3PDvJB/tdlwT8GXAs8ALwXeB3wHl9ragx3gpigkvyN8Aq4OyqOi7Ja4B/rKq5fS5NUp85BDTxHVNVC5KcBVBVz+Ulg65SPyR5A/DnwEwGfRZV1bv7VVNrDICJ7w9JDqSbXEtyDL1DbqnfvgdcC1wHbO1zLU0yACa+i+mdZz0jyU3A24F/19eKpJ4tVXVNv4tomXMADUgyBTgRCHBPVe3d37uqCW3QdSifBtYDtzHoqLSqnu5HXS0yACa4JPN20rwReKyqtox3PVKSX9Ibktw2F7XDh1BVeY3KODEAJrgk9wDzgPvp/Q93HLAGeC1wblX9Qx/LU8O6ualPAifRC4H/AVxbVb/va2EN8TqAie//AW+uqvlV9RbgzcAjwHuAr/a1MrXuRuDfAFcB/xmY07VpnDgJPPG9YfD3LlfVg0n+qKoe8WxQ9dlxVTVn0PKPkjzYt2oaZABMfGuSXAPc3C0vAB5MMgnY3L+yJH6a5MSqugcgyVuBlX2uqSnOAUxwLxlnBfifwF8BzwOvqapN/apNbUvyc+CNwP/tmv4l8AtgC1BV9cf9qq0VBoCkvkhy9Cutr6rHxquWVhkAE1ySt9O77/rR7Hi5vafaSY0zACa4JA8Bn6V3Q7jtl9tX1Ya+FSVpr+Ak8MS3sap+0O8iJO19PAKY4JJ8BdiXl19u/9O+FSVpr2AATHBJfrST5vKWu5IMAElqlHMADUjyAXpfvTd5W1tVfbl/FUnaG3gvoAkuybX0rv79M3o3g/sIvVNCJTXOIaAJLsn9VfXHgx4PBn5QVe/od22S+ssjgInv+e7xuSSvp3eZ/ZF9rEfSXsI5gInvvyV5HfA14Kf07rv+rf6WJGlvYABMfA8BW6vq1iRz6H05zH/tc02S9gIOAU18X6qqZ5KcBLwbuA7wi7glGQAN2Hb/nw8A36qqvwcO6GM9kvYSBsDEty7JN+mdCrq8+yIY/7tL8jTQiS7Ja4BTgAeq6uEkRwLH+2XwkgwASWqUQwGS1CgDQJIaZQBIUqMMAElq1P8HbSbhZe32NegAAAAASUVORK5CYII=\n", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "text/plain": "
" - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "## Clean up data\n", "\n", @@ -168,9 +166,940 @@ " pd.DataFrame(count_sanger, index=['sanger']),\n", " pd.DataFrame(count_phe, index=['phe'])\n", " ]\n", - ")\n", - "summary_data.plot.bar()" + ")" ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "data_source count number_match\n0 sanger YES 2909\n1 phe YES 2964\n2 sanger NO 138\n3 phe NO 83\n" + } + ], + "source": [ + "data_summary_long = pd.melt(summary_data.reset_index(), \n", + " id_vars='index', \n", + " value_vars=['YES', 'NO']).rename(columns = {\n", + " 'index': 'data_source',\n", + " 'value': 'number_match',\n", + " 'variable': 'count'})\n", + "print(data_summary_long)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "hoverlabel": { + "namelength": 0 + }, + "hovertemplate": "count=YES
data_source=%{x}
number_match=%{y}", + "legendgroup": "count=YES", + "marker": { + "color": "#636efa" + }, + "name": "count=YES", + "offsetgroup": "count=YES", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "sanger", + "phe" + ], + "xaxis": "x", + "y": [ + 2909, + 2964 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hoverlabel": { + "namelength": 0 + }, + "hovertemplate": "count=NO
data_source=%{x}
number_match=%{y}", + "legendgroup": "count=NO", + "marker": { + "color": "#EF553B" + }, + "name": "count=NO", + "offsetgroup": "count=NO", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "sanger", + "phe" + ], + "xaxis": "x", + "y": [ + 138, + 83 + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "group", + "height": 600, + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 0.98 + ], + "ticktext": [ + "Sanger", + "PHE" + ], + "tickvals": [ + "sanger", + "phe" + ], + "title": { + "text": "Data Sources" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Sample Count" + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.bar(data_summary_long, x=\"data_source\", y=\"number_match\", color=\"count\", barmode=\"group\")\n", + "fig.update_xaxes(title = \"Data Sources\", ticktext=[\"Sanger\", \"PHE\"], tickvals=['sanger', 'phe'])\n", + "fig.update_yaxes(title = \"Sample Count\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ] }