In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import skimage.io
import os
import re
import shutil
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from textwrap import wrap
np.random.seed(1234)
%matplotlib inline

In [13]:
#TRAIN_PATH = '/home/alvin/cei/notebook_home/kaggleDSB2018/data/stage1_train/'
STAGE1_TRAIN = './data/stage1_train_fix_v4_external'
STAGE1_TEST = './data/stage1_test'

STAGE1_TRAIN_IMAGE_PATTERN = "%s/{}/images/{}.png" % STAGE1_TRAIN
STAGE1_TEST_IMAGE_PATTERN = "%s/{}/images/{}.png" % STAGE1_TEST
STAGE1_TRAIN_MASK_PATTERN = "%s/{}/masks/*.png" % STAGE1_TRAIN
STAGE1_TEST_MASK_PATTERN = "%s/{}/masks/*.png" % STAGE1_TEST
IMAGE_ID = "image_id"
IMAGE_WIDTH = "width"
IMAGE_WEIGHT = "height"
HSV_CLUSTER = "hsv_cluster"
HSV_DOMINANT = "hsv_dominant"
#TOTAL_MASK = "total_masks"

In [3]:
def read_image(image_file, space="rgb"):
    #image_file = STAGE1_TRAIN_IMAGE_PATTERN.format(image_id, image_id)
    #image_file = STAGE1_TEST_IMAGE_PATTERN.format(image_id, image_id)
    #print(image_file)
    image = skimage.io.imread(image_file)
    # Drop alpha which is not used
    image = image[:, :, :3]
    if space == "hsv":
        image = skimage.color.rgb2hsv(image)
    return image

# Get image width, height and count masks available.
def read_image_hsv(image_file, space="rgb"):
    image_hsv = read_image(image_file, space = space)
    #image_file = STAGE1_TRAIN_IMAGE_PATTERN.format(image_id, image_id)
    #print(image_file)
    return image_hsv

In [4]:
def get_domimant_colors(img, top_colors=2):
    img_l = img.reshape((img.shape[0] * img.shape[1], img.shape[2]))
#    print(img.shape[0], img.shape[1], img.shape[2])
    clt = KMeans(n_clusters = top_colors)
    clt.fit(img_l)
    # grab the number of different clusters and create a histogram
    # based on the number of pixels assigned to each cluster
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins = numLabels)
    # normalize the histogram, such that it sums to one
    hist = hist.astype("float")
    hist /= hist.sum()
    return clt.cluster_centers_, hist

def get_images_details(image_ids,train_flag):
    details = []
    count = 0
    for image_id in image_ids:
        if train_flag:
            image_file = STAGE1_TRAIN_IMAGE_PATTERN.format(image_id, image_id)
        else:
            image_file = STAGE1_TEST_IMAGE_PATTERN.format(image_id, image_id)
        image_hsv = read_image_hsv(image_file, space="hsv")
        height, width, l = image_hsv.shape
        dominant_colors_hsv, dominant_rates_hsv = get_domimant_colors(image_hsv, top_colors=1)
        dominant_colors_hsv = dominant_colors_hsv.reshape(1, dominant_colors_hsv.shape[0] * dominant_colors_hsv.shape[1])
        info = (image_id, width, height, dominant_colors_hsv.squeeze())
        details.append(info)
        count = count + 1
        print(count, ":", image_id, width, height)
    return details

# kaggle traing data

In [5]:
root = './stage1_train_fix_v4_external'
if os.path.isfile(root + '.csv'):
    df = pd.read_csv(root + '.csv')

    #ok = (df['source'] == 'Kaggle') | (df['source'] == 'TCGA')
    ok = (df['source'] == 'Kaggle') 
    df = df[ok]

    #ok = df['major_category'] == 'Histology'
    #df = df[ok]


In [None]:
train_image_ids = df['image_id'].as_matrix()
print(len(train_image_ids))
train_image_ids

In [7]:

details = get_images_details(train_image_ids, train_flag=True)

1 : af576e8ec3a8d0b57eb6a311299e9e4fd2047970d3dd9d6f52e54ea6a91109da 320 256
2 : bf4a61bb81589c9a67e3343408befd3e135af5e88b50c17f998f2131d24bc271 320 256
3 : 8e507d58f4c27cd2a82bee79fe27b069befd62a46fdaed20970a95a2ba819c7b 320 256
4 : 01d44a26f6680c42ba94c9bc6339228579a95d0e2695b149b7cc0c9592b21baf 320 256
5 : 589f86dee5b480a88dd4f77eeaffe2c4d70aefdf879a4096dde1fa4d41055b8f 320 256
6 : b909aa8f6f4bec37c3fb6ff5a85d166162d07983506fcc57be742b0f9dbafbf7 320 256
7 : f4b7c24baf69b8752c49d0eb5db4b7b5e1524945d48e54925bff401d5658045d 320 256
8 : 57bd029b19c1b382bef9db3ac14f13ea85e36a6053b92e46caedee95c05847ab 320 256
9 : 92f31f591929a30e4309ab75185c96ff4314ce0a7ead2ed2c2171897ad1da0c7 320 256
10 : 6b0ac2ab04c09dced54058ec504a4947f8ecd5727dfca7e0b3f69de71d0d31c7 320 256
11 : 1e488c42eb1a54a3e8412b1f12cde530f950f238d71078f2ede6a85a02168e1f 320 256
12 : a7f767ca9770b160f234780e172aeb35a50830ba10dc49c526f4712451abe1d2 320 256
13 : 4193474b2f1c72f735b13633b219d9cabdd43c21d9c2bb4dfc4809f104ba4c06 320

107 : f952cc65376009cfad8249e53b9b2c0daaa3553e897096337d143c625c2df886 320 256
108 : 93c5638e7e6433b5c9cc87c152bcbe28873d2f9d6a392cca0642520807542a77 256 256
109 : 84eeec681987753029eb83ea5f3ff7e8b5697783cdb2035f2882d40c9a3f1029 256 256
110 : 220b37f4ca7cab486d2b71cd87a46ee7411a5aa142799d96ed98015ab5ba538a 320 256
111 : 58c593bcb98386e7fd42a1d34e291db93477624b164e83ab2afa3caa90d1d921 256 256
112 : e5aeb5b3577abbebe8982b5dd7d22c4257250ad3000661a42f38bf9248d291fd 256 256
113 : c15c652c08153fb781a5349123ab8f80bb2a8680a41eb8e89e547ae01b7a5441 696 520
114 : 3d0ca3498d97edebd28dbc7035eced40baa4af199af09cbb7251792accaa69fe 256 256
115 : b98681c74842c4058bd2f88b06063731c26a90da083b1ef348e0ec734c58752b 256 256
116 : 0b2e702f90aee4fff2bc6e4326308d50cf04701082e718d4f831c8959fbcda93 256 256
117 : adc315bd40d699fd4e4effbcce81cd7162851007f485d754ad3b0472f73a86df 360 360
118 : fc9269fb2e651cd4a32b65ae164f79b0a2ea823e0a83508c85d7985a6bed43cf 256 256
119 : c6de542205b891eed5c40e6d8ae3d03a6ca39b26dc445b

212 : 7af09f98ec299ba0658d759eebc4c34e1c98289ea6ce37f233e9f5e4e2fc84f4 256 256
213 : 1c2f9e121fc207efff79d46390df1a740566b683ff56a96d8cabe830a398dd2e 256 256
214 : 79dfcbc9361edd3a1ffe81a5bdaa22a197ad1341f3fa64b86a646c2607d6b324 696 520
215 : 573e1480b500c395f8d3f1800e1998bf553af0d3d43039333d33cf37d08f64e5 256 256
216 : 305a8baaf726d7c9e695bff31d3a6a61445999a4732f0a3e6174dc9dcbe43931 256 256
217 : d2ce593bddf9998ce3b76328c0151d0ba4b644c293aca7f6254e521c448b305f 256 256
218 : f0a75e0322f11cead4219aa530673fe5eef67580fb6fccc254963c9fc6b58aa1 256 256
219 : e9b8ad127f2163438b6236c74938f43d7b4863aaf39a16367f4af59bfd96597b 256 256
220 : 11a0170f44e3ab4a8d669ae8ea9546d3a32ebfe6486d9066e5648d30b4e1cb69 256 256
221 : 92e7e86e765e05ce331c07a6d14f0a696eac7ee40058699243900f40b696d7aa 256 256
222 : f67e72b7fe0b1e3648ea745ffd395c80705c89b0c0c48227991fe6f5815b2a18 256 256
223 : bbfc4aab5645637680fa0ef00925eea733b93099f1944c0aea09b78af1d4eef2 696 520
224 : 8a65e41c630d85c0004ce1772ff66fbc87aca34cb165f6

316 : 14cc1424c59808274e123db51292e9dbb5b037ef3e7c767a8c45c9ac733b91bf 360 360
317 : e4537e7893e631f3ba6ae5b1023e24b233c78249a31c2f5e561f6c4cad88fcf6 256 256
318 : 514ccfc78cb55988a238d3ac9dc83460aa88382c95d56bcc0559962d9fe481ef 360 360
319 : a815a986800a95de0957116c6585deea8ffb6ee09ad00ccc687306937ac698d0 256 256
320 : b0d6dfcc95e4d087d232378f860fc3ef9f95ea5a4c26d623a0be091f820a793f 256 256
321 : d1ba6089cae2f90cb7275ece10ca393c25f60ea17e5c9c3cea2399d31fd41869 256 256
322 : 4bf6a5ec42032bb8dbbb10d25fdc5211b2fe1ce44b6e577ef89dbda17697d819 256 256
323 : 2869fad54664677e81bacbf00c2256e89a7b90b69d9688c9342e2c736ff5421c 360 360
324 : 4948e3dfe0483c2198b9616612071ead5474dffd223071cf0a567aea9aca0b9e 360 360
325 : 2a2032c4ed78f3fc64de7e5efd0bec26a81680b07404eaa54a1744b7ab3f8365 360 360
326 : 03b9306f44e9b8951461623dcbd615550cdcf36ea93b203f2c8fa58ed1dffcbe 360 360
327 : af8621ef0db8c26b0bce6385bd5609b584bfd678fcf7a234b8a15e6bb05c15ac 256 256
328 : 50a7ea80dd73232a17f98b5c83f62ec89989e892fe25b7

420 : b1a239838c7dbb34ffea851ad537899f24da62f4e3f3fd6d835ff7b922f27313 360 360
421 : a6e81120d1cb9f71f8a25f90a5d56c4b714a642fc496a705e38921fd90a3f69c 360 360
422 : ddf1bf458312de2895dd9cc5ce7ec9d334ad54c35edc96ad6001d20b1d8588d8 256 256
423 : 77ceeb87f560775ac150b8b9b09684ed3e806d0af6f26cce8f10c5fc280f5df2 256 256
424 : fdda64c47361b0d1a146e5b7b48dc6b7de615ea80b31f01227a3b16469589528 256 256
425 : f20eb4592e7d3cf58d421a9c34832d33adcdcbd0e17b7bf009a013847608da27 256 256
426 : 06c779330d6d3447be21df2b9f05d1088f5b3b50dc48724fc130b1fd2896a68c 360 360
427 : aa47f0b303b1d525b52452ae3a8553b2d61d719a28aee547e2ef1fc6730a078f 256 256
428 : 4327d27591871e9c8d317071a390d1b3dcedad05a9746175b005c41ea0d797b2 360 360
429 : a22b7882fa85b9f0fcef659a7b82bfcddf01710f9a7617a9e036e84ac6901841 256 256
430 : 5953af5080d981b554529971903d8bee9871457a4361b51f04ba04f43793dd8f 256 256
431 : 20e209f6ffa120a72712e1b4c1d3e24d1339227e2936abd4bbd49a636fada423 640 512
432 : dae976f161fe42dc58dee87d4bf2eb9f65736597cab011

524 : c7d546766518703580f63d5d8f11d54971044753f53c0b257d19c2f99d4bfdd0 256 256
525 : 358e47eaa1e9222252793fe0fb8c77028d4e0d4360b95a07c9fe6df6a2066556 256 256
526 : 2f929b067a59f88530b6bfa6f6889bc3a38adf88d594895973d1c8b2549fd93d 256 256
527 : fd8065bcb1afdbed19e028465d5d00cd2ecadc4558de05c6fa28bea3c817aa22 256 256
528 : f26f4c2c70c38fe12e00d5a814d5116691f2ca548908126923fd76ddd665ed24 256 256
529 : a486f6ed4b8781e7883e433d06a83dd66db3e8b36d45b9976c4214820ee22629 696 520
530 : 0bda515e370294ed94efd36bd53782288acacb040c171df2ed97fd691fc9d8fe 256 256
531 : 4a424e0cb845cf6fd4d9fe62875552c7b89a4e0276cf16ebf46babe4656a794e 256 256
532 : 9e4f8ec60a0d622a02c0e16eedcc0101f88ddefbcec2383946c4572b57a1e43a 256 256
533 : f728de04267283f0b4daab9a840e7433b2c6034baf195fd526850439c9297687 640 512
534 : 356d9903d16074f152fe8f2f0ef555d9959c53264228eae7373cad5cf35d4e85 256 256
535 : 245b995878370ef4ea977568b2b67f93d4ecaa9308761b9d3e148e0803780183 256 256
536 : 2ab91a4408860ae8339689ed9f87aa9359de1bdd4ca5c2

628 : a891bbc89143bca7a717386144eb061ec2d599cba24681389bcb3a2fedb8ff8c 696 520
629 : 3f9fc8e63f87e8a56d3eaef7db26f1b6db874d19f12abd5a752821b78d47661e 696 520
630 : 193ffaa5272d5c421ae02130a64d98ad120ec70e4ed97a72cdcd4801ce93b066 696 520
631 : 6b61ab2e3ff0e2c7a55fd71e290b51e142555cf82bc7574fc27326735e8acbd1 696 520
632 : 9620c33d8ef2772dbc5bd152429f507bd7fafb27e12109003292b671e556b089 696 520
633 : b61d3fb0d0ebbee018346e0adeff9e9178f33aa95262779b3c196f93b4ace895 696 520
634 : 175dbb364bfefc9537931144861c9b6e08934df3992782c669c6fe4234319dfc 696 520
635 : 4dbbb275960ab9e4ec2c66c8d3000f7c70c8dce5112df591b95db84e25efa6e9 696 520
636 : 6b72b61b80060a9e79a4747f9c5d5af135af9db466681c2d1086f784c7130699 696 520
637 : 538b7673d507014d83af238876e03617396b70fe27f525f8205a4a96900fbb8e 1272 603
638 : 150b0ffa318c87b31d78af0e87d60390dbcd84b5f228a8c1fb3225cbe5df3e3f 696 520
639 : f29fd9c52e04403cd2c7d43b6fe2479292e53b2f61969d25256d2d2aca7c6a81 1272 603
640 : e23e11414ee645b51081fb202d38b793f0c8ef2940f8

In [8]:
META_COLS = [IMAGE_ID, IMAGE_WIDTH, IMAGE_WEIGHT]
COLS = META_COLS + [HSV_DOMINANT]

testPD = pd.DataFrame(details, columns=COLS)
X = (pd.DataFrame(testPD[HSV_DOMINANT].values.tolist())).as_matrix()
kmeans = KMeans(n_clusters=3).fit(X)
clusters = kmeans.predict(X)
testPD[HSV_CLUSTER] = clusters
#testPD_sort = testPD.sort_values(['hsv_cluster'], ascending=[1])
#testPD_sort.to_csv("stage1_test_3clusters_sort.csv")

In [9]:
newpd = pd.concat([testPD, df['major_category']], axis=1)
newpd.to_csv("stage1_train_3clusters_sort.csv")
newpd

Unnamed: 0,image_id,width,height,hsv_dominant,hsv_cluster,major_category
0,af576e8ec3a8d0b57eb6a311299e9e4fd2047970d3dd9d...,320,256,"[0.7344588849989617, 0.24131209320036598, 0.81...",0,Histology
1,bf4a61bb81589c9a67e3343408befd3e135af5e88b50c1...,320,256,"[0.7421093003600739, 0.2104115514078713, 0.789...",0,Histology
2,8e507d58f4c27cd2a82bee79fe27b069befd62a46fdaed...,320,256,"[0.7334707808806699, 0.23581063661371732, 0.81...",0,Histology
3,01d44a26f6680c42ba94c9bc6339228579a95d0e2695b1...,320,256,"[0.7435220601544178, 0.23248918633236715, 0.78...",0,Histology
4,589f86dee5b480a88dd4f77eeaffe2c4d70aefdf879a40...,320,256,"[0.7415122445473382, 0.33030192011057236, 0.77...",0,Histology
5,b909aa8f6f4bec37c3fb6ff5a85d166162d07983506fcc...,320,256,"[0.7421906605822839, 0.28829798565497716, 0.77...",0,Histology
6,f4b7c24baf69b8752c49d0eb5db4b7b5e1524945d48e54...,320,256,"[0.5554002667262048, 0.05717648021000052, 0.82...",0,Histology
7,57bd029b19c1b382bef9db3ac14f13ea85e36a6053b92e...,320,256,"[0.8466080724863937, 0.39813288861538576, 0.77...",0,Histology
8,92f31f591929a30e4309ab75185c96ff4314ce0a7ead2e...,320,256,"[0.6001703318431759, 0.062376887171090384, 0.7...",0,Histology
9,6b0ac2ab04c09dced54058ec504a4947f8ecd5727dfca7...,320,256,"[0.7428985043527983, 0.2681054200306483, 0.784...",0,Histology


# kaggle testing data

In [10]:
root = './stage1_test_v4_ordered'
if os.path.isfile(root + '.csv'):
    df = pd.read_csv(root + '.csv')


In [11]:
test_image_ids = df['image_id'].as_matrix()
print(len(test_image_ids))
test_image_ids

65


array(['259b35151d4a7a5ffdd7ab7f171b142db8cfe40beeee67277fac6adca4d042c4',
       '17b9bf4356db24967c4677b8376ac38f826de73a88b93a8d73a8b452e399cdff',
       '0f1f896d9ae5a04752d3239c690402c022db4d72c0d2c087d73380896f72c466',
       '472b1c5ff988dadc209faea92499bc07f305208dbda29d16262b3d543ac91c71',
       'f5effed21f671bbf4551ecebb7fe95f3be1cf09c16a60afe64d2f0b95be9d1eb',
       '9f17aea854db13015d19b34cb2022cfdeda44133323fcd6bb3545f7b9404d8ab',
       '44afae184c89e6ba55985b4d341acc1ae1e8b6ef96312064e0e6e630e022b078',
       'da6c593410340b19bb212b9f6d274f95b08c0fc8f2570cd66bc5ed42c560acab',
       'c8e79ff4ac55f4b772057de28e539727b7f4f2a3de73bf7a082a0ace86d609eb',
       '1d9eacb3161f1e2b45550389ecf7c535c7199c6b44b1c6a46303f7b965e508f1',
       'b83d1d77935b6cfd44105b54600ffc4b6bd82de57dec65571bcb117fa8398ba3',
       '3c4c675825f7509877bc10497f498c9a2e3433bf922bd870914a2eb21a54fd26',
       '648c8ffa496e1716017906d0bf135debfc93386ae86aa3d4adbda9a505985fd9',
       '505bc0a3928d8aef5

In [15]:
details = get_images_details(test_image_ids, train_flag=False)

1 : 259b35151d4a7a5ffdd7ab7f171b142db8cfe40beeee67277fac6adca4d042c4 680 512
2 : 17b9bf4356db24967c4677b8376ac38f826de73a88b93a8d73a8b452e399cdff 680 512
3 : 0f1f896d9ae5a04752d3239c690402c022db4d72c0d2c087d73380896f72c466 680 512
4 : 472b1c5ff988dadc209faea92499bc07f305208dbda29d16262b3d543ac91c71 680 512
5 : f5effed21f671bbf4551ecebb7fe95f3be1cf09c16a60afe64d2f0b95be9d1eb 680 512
6 : 9f17aea854db13015d19b34cb2022cfdeda44133323fcd6bb3545f7b9404d8ab 680 512
7 : 44afae184c89e6ba55985b4d341acc1ae1e8b6ef96312064e0e6e630e022b078 680 512
8 : da6c593410340b19bb212b9f6d274f95b08c0fc8f2570cd66bc5ed42c560acab 680 512
9 : c8e79ff4ac55f4b772057de28e539727b7f4f2a3de73bf7a082a0ace86d609eb 348 524
10 : 1d9eacb3161f1e2b45550389ecf7c535c7199c6b44b1c6a46303f7b965e508f1 348 524
11 : b83d1d77935b6cfd44105b54600ffc4b6bd82de57dec65571bcb117fa8398ba3 348 524
12 : 3c4c675825f7509877bc10497f498c9a2e3433bf922bd870914a2eb21a54fd26 348 524
13 : 648c8ffa496e1716017906d0bf135debfc93386ae86aa3d4adbda9a505985fd9 256

In [16]:
META_COLS = [IMAGE_ID, IMAGE_WIDTH, IMAGE_WEIGHT]
COLS = META_COLS + [HSV_DOMINANT]

testPD = pd.DataFrame(details, columns=COLS)
X = (pd.DataFrame(testPD[HSV_DOMINANT].values.tolist())).as_matrix()
#kmeans = KMeans(n_clusters=3).fit(X) # skip traing procedure, do testing directly
clusters = kmeans.predict(X)
testPD[HSV_CLUSTER] = clusters

In [26]:
newpd = pd.concat([testPD, df[['major_category','image_id']]], axis=1)
newpd.to_csv("stage1_test_3clusters_sort.csv")
newpd

Unnamed: 0,image_id,width,height,hsv_dominant,hsv_cluster,major_category,image_id.1
0,259b35151d4a7a5ffdd7ab7f171b142db8cfe40beeee67...,680,512,"[0.6763537133343291, 0.24229532530702752, 0.67...",0,Histology,259b35151d4a7a5ffdd7ab7f171b142db8cfe40beeee67...
1,17b9bf4356db24967c4677b8376ac38f826de73a88b93a...,680,512,"[0.5317290216974283, 0.1715136791573704, 0.751...",0,Histology,17b9bf4356db24967c4677b8376ac38f826de73a88b93a...
2,0f1f896d9ae5a04752d3239c690402c022db4d72c0d2c0...,680,512,"[0.8193604814877514, 0.3447413354496781, 0.713...",0,Histology,0f1f896d9ae5a04752d3239c690402c022db4d72c0d2c0...
3,472b1c5ff988dadc209faea92499bc07f305208dbda29d...,680,512,"[0.8444082046681772, 0.3762127985917782, 0.705...",0,Histology,472b1c5ff988dadc209faea92499bc07f305208dbda29d...
4,f5effed21f671bbf4551ecebb7fe95f3be1cf09c16a60a...,680,512,"[0.21538513612938068, 0.15186398285588143, 0.8...",2,Histology,f5effed21f671bbf4551ecebb7fe95f3be1cf09c16a60a...
5,9f17aea854db13015d19b34cb2022cfdeda44133323fcd...,680,512,"[0.2427573357995684, 0.1530519099811833, 0.857...",2,Histology,9f17aea854db13015d19b34cb2022cfdeda44133323fcd...
6,44afae184c89e6ba55985b4d341acc1ae1e8b6ef963120...,680,512,"[0.30086713644324237, 0.18315814150781695, 0.8...",2,Histology,44afae184c89e6ba55985b4d341acc1ae1e8b6ef963120...
7,da6c593410340b19bb212b9f6d274f95b08c0fc8f2570c...,680,512,"[0.3278645602137175, 0.2002381983377835, 0.832...",2,Histology,da6c593410340b19bb212b9f6d274f95b08c0fc8f2570c...
8,c8e79ff4ac55f4b772057de28e539727b7f4f2a3de73bf...,348,524,"[0.25339702677753717, 0.13280993767696034, 0.8...",2,Histology,c8e79ff4ac55f4b772057de28e539727b7f4f2a3de73bf...
9,1d9eacb3161f1e2b45550389ecf7c535c7199c6b44b1c6...,348,524,"[0.28879776097692783, 0.10802044675531165, 0.8...",2,Histology,1d9eacb3161f1e2b45550389ecf7c535c7199c6b44b1c6...


In [22]:
df[['image_id','major_category']]

Unnamed: 0,image_id,major_category
0,259b35151d4a7a5ffdd7ab7f171b142db8cfe40beeee67...,Histology
1,17b9bf4356db24967c4677b8376ac38f826de73a88b93a...,Histology
2,0f1f896d9ae5a04752d3239c690402c022db4d72c0d2c0...,Histology
3,472b1c5ff988dadc209faea92499bc07f305208dbda29d...,Histology
4,f5effed21f671bbf4551ecebb7fe95f3be1cf09c16a60a...,Histology
5,9f17aea854db13015d19b34cb2022cfdeda44133323fcd...,Histology
6,44afae184c89e6ba55985b4d341acc1ae1e8b6ef963120...,Histology
7,da6c593410340b19bb212b9f6d274f95b08c0fc8f2570c...,Histology
8,c8e79ff4ac55f4b772057de28e539727b7f4f2a3de73bf...,Histology
9,1d9eacb3161f1e2b45550389ecf7c535c7199c6b44b1c6...,Histology


In [30]:
aa = ['3','2','4']

In [36]:
sum(list(map(int, aa)))

9