In [21]:
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import tensorflow as tf 
import os
import cv2
import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell
from PIL import Image
from typing import Tuple, List
import warnings
from torch.serialization import SourceChangeWarning
warnings.filterwarnings("ignore", category=SourceChangeWarning)


NUC_MODEL = '/media/beta/mitko-beps/segmentator/model_paths/dpn_unet_nuclei_v1.pth' #./nuclei_model.pth
CELL_MODEL = '/media/beta/mitko-beps/segmentator/model_paths/dpn_unet_cell_3ch_v1.pth'

def build_image_names(image_id: str) -> Tuple[List[str], List[str], List[str], List[str], List[List[str]]]:
    # red = microtubule
    mt = f'/media/beta/mitko-beps/data/test/{image_id}_red.png'

    # blue = nuclei
    nu = f'/media/beta/mitko-beps/data/test/{image_id}_blue.png'

    # yellow = endoplasmic reticulum
    er = f'/media/beta/mitko-beps/data/test/{image_id}_yellow.png'

    # green = target protein
    tp = f'/media/beta/mitko-beps/data/test/{image_id}_green.png'

    return [mt], [er], [nu], [tp], [[mt], [er], [nu]]


segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device='cpu',
    padding=False,
    multi_channel_model=True)

def pad(img, h, w):
    #  in case when you have odd number
    # Function to padd the images
    top_pad = np.floor((h - img.shape[0]) / 2).astype(np.uint16)
    bottom_pad = np.ceil((h - img.shape[0]) / 2).astype(np.uint16)
    right_pad = np.ceil((w - img.shape[1]) / 2).astype(np.uint16)
    left_pad = np.floor((w - img.shape[1]) / 2).astype(np.uint16)
    return np.copy(np.pad(img, ((top_pad, bottom_pad), (left_pad, right_pad), (0, 0)), mode='constant', constant_values=0))


def segment_picture(image_name):
    names = build_image_names(image_name)

    cell_segmentations = segmentator.pred_cells(names[-1])
    nuc_segmentations = segmentator.pred_nuclei(names[2])

    _, cell_mask = label_cell(nuc_segmentations[0], cell_segmentations[0])
    
    mt = plt.imread(names[0][0])  #names[0][0] = string, names[0] = list
    er = plt.imread(names[1][0])
    nu = plt.imread(names[2][0])
    tp = plt.imread(names[3][0])
    
    cell_masks = []
    
    #For each mask
    for cell in set(cell_mask.flatten()):  #set zet in volgorde
        if cell == 0:
            continue
        mask_individual_cell = cell_mask == cell

        cell_masks.append(mask_individual_cell)
        
        a = np.where(cell_mask == cell)
        first_y, last_y, first_x, last_x = np.min(a[0]), np.max(a[0]), np.min(a[1]), np.max(a[1])

        sub_img = mask_individual_cell[first_y: last_y, first_x: last_x]

        final_img_mt = np.where(mask_individual_cell[first_y: last_y, first_x: last_x] == False, np.zeros_like(sub_img),
 mt[first_y: last_y, first_x: last_x])
        final_img_er = np.where(mask_individual_cell[first_y: last_y, first_x: last_x] == False, np.zeros_like(sub_img),
 er[first_y: last_y, first_x: last_x])
        final_img_nu = np.where(mask_individual_cell[first_y: last_y, first_x: last_x] == False, np.zeros_like(sub_img),
 nu[first_y: last_y, first_x: last_x])
        final_img_tp = np.where(mask_individual_cell[first_y: last_y, first_x: last_x] == False, np.zeros_like(sub_img),
 tp[first_y: last_y, first_x: last_x])
        a = np.where(cell_mask == cell)
        
        img = np.dstack((final_img_mt, final_img_nu, final_img_er, final_img_tp))
        
        
        target_size = (224, 224)
        size = (img.shape[0], img.shape[1])
        batch_shape = (224, 224, 4)
        batch_center = np.array(batch_shape[:2])//2

        if size[0] <= target_size[0] and size[1] <= target_size[1]:
            new_img = pad(img, target_size[0], target_size[1])

        else:
            axis_to_add = size[0] > size[1]
            pixels_to_add = max(size) - min(size)

            if axis_to_add:
                new_block = np.zeros((size[int(not axis_to_add)], pixels_to_add // 2, 4))
                new_block_2 = np.zeros((size[int(not axis_to_add)], int(pixels_to_add % 2 != 0) + pixels_to_add // 2, 4)
)
            else:
                new_block = np.zeros((pixels_to_add // 2, size[int(not axis_to_add)], 4))
                new_block_2 = np.zeros((int(pixels_to_add % 2 != 0) + pixels_to_add // 2, size[int(not axis_to_add)], 4)
)
            new_img = np.concatenate((new_block, img, new_block_2), axis=int(axis_to_add))
            assert new_img.shape[0] == new_img.shape[1]
            new_img = tf.image.resize(new_img, batch_shape[:2])

        image_arr = np.array(new_img)
        root = "/media/beta/mitko-beps/heather/TestSeg/" + image_name + "_" + str(cell) + ".npy"
        np.save(root, image_arr)
    
    return max(set(cell_mask.flatten())), cell_masks

In [2]:
import base64
import numpy as np
import os 
import pandas as pd
from pycocotools import _mask as coco_mask
import typing as t
import zlib


def encode_binary_mask(mask: np.ndarray) -> t.Text:
    """Converts a binary mask into OID challenge encoding ascii text."""

    # check input mask --
    if mask.dtype != np.bool:
        raise ValueError(
           "encode_binary_mask expects a binary mask, received dtype == %s" %
           mask.dtype)

    mask = np.squeeze(mask)
    if len(mask.shape) != 2:
        raise ValueError(
           "encode_binary_mask expects a 2d mask, received shape == %s" %
           mask.shape)

     # convert input mask to expected COCO API input --
    mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
    mask_to_encode = mask_to_encode.astype(np.uint8)
    mask_to_encode = np.asfortranarray(mask_to_encode)

    # RLE encode mask --
    encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

    # compress and base64 encoding --
    binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
    base64_str = base64.b64encode(binary_str)
    return base64_str.decode("utf-8")

In [3]:
test_df = pd.read_csv("/media/beta/mitko-beps/data/sample_submission.csv")
test_df["n_cells"] = 0
test_df["Encoded_Masks"] = ""

c = len(test_df)
all_enc_masks = []
for n, i in enumerate(test_df['ID']):
    start_time = time.time()
    try: 
        i = i.split('/')[-1]
        cells, cell_masks = segment_picture(i)
        test_df.loc[test_df['ID'] == i, 'n_cells'] = cells
    except:
        print('error in ' + i)
        with open('/media/beta/mitko-beps/heather/errors_1.txt', 'a') as f:
            f.write(i + '\n')
        cells = -1
    
    encoded_masks = []
                
    for mask in cell_masks: 
        encoded_mask = encode_binary_mask(mask)
        encoded_masks.append(encoded_mask)
    
    all_enc_masks.append(encoded_masks)
    #test_df.iat[n, test_df.columns.get_loc('Encoded_Masks')] = encoded_masks
    #test_df.loc[test_df["ID"] == i, "Encoded_Masks"] == encoded_masks
    
    print("{} out of {}, {:.2f}%.  {} cells segmented".format(n + 1, c, 100 * n / c, cells))
    print("took {:.2f}s".format(time.time()-start_time))
    print()
    
        

test_df.to_csv('/media/beta/mitko-beps/heather/test_with_masks_1.csv')
np.save("/media/beta/mitko-beps/heather/encoded_masks_1.npy", all_enc_masks)

1 out of 559, 0.00%.  15 cells segmented
took 23.06s

2 out of 559, 0.18%.  12 cells segmented
took 21.60s

3 out of 559, 0.36%.  36 cells segmented
took 25.24s

4 out of 559, 0.54%.  22 cells segmented
took 21.73s

5 out of 559, 0.72%.  34 cells segmented
took 54.42s

6 out of 559, 0.89%.  21 cells segmented
took 22.12s

error in 020a29cf-2c24-478b-8603-c22a90dc3e31
7 out of 559, 1.07%.  -1 cells segmented
took 3.34s

8 out of 559, 1.25%.  16 cells segmented
took 23.25s

9 out of 559, 1.43%.  32 cells segmented
took 26.29s

10 out of 559, 1.61%.  22 cells segmented
took 21.85s

11 out of 559, 1.79%.  12 cells segmented
took 22.84s

error in 02b3c5aa-d70c-49d1-b3e5-3f5cc10375ca
12 out of 559, 1.97%.  -1 cells segmented
took 3.17s

error in 02f9ee97-bf2e-4d9d-bdfd-b903ec2e79c0
13 out of 559, 2.15%.  -1 cells segmented
took 2.98s

14 out of 559, 2.33%.  30 cells segmented
took 23.95s

15 out of 559, 2.50%.  20 cells segmented
took 22.83s

16 out of 559, 2.68%.  16 cells segmented
took 21

132 out of 559, 23.43%.  19 cells segmented
took 24.89s

133 out of 559, 23.61%.  19 cells segmented
took 23.31s

134 out of 559, 23.79%.  15 cells segmented
took 23.04s

135 out of 559, 23.97%.  38 cells segmented
took 25.25s

136 out of 559, 24.15%.  87 cells segmented
took 29.60s

137 out of 559, 24.33%.  20 cells segmented
took 23.55s

138 out of 559, 24.51%.  9 cells segmented
took 20.78s

139 out of 559, 24.69%.  9 cells segmented
took 21.19s

140 out of 559, 24.87%.  20 cells segmented
took 24.45s

141 out of 559, 25.04%.  29 cells segmented
took 25.38s

142 out of 559, 25.22%.  17 cells segmented
took 23.60s

143 out of 559, 25.40%.  20 cells segmented
took 24.63s

144 out of 559, 25.58%.  15 cells segmented
took 22.62s

145 out of 559, 25.76%.  19 cells segmented
took 24.83s

146 out of 559, 25.94%.  17 cells segmented
took 24.46s

147 out of 559, 26.12%.  18 cells segmented
took 21.70s

148 out of 559, 26.30%.  7 cells segmented
took 19.71s

149 out of 559, 26.48%.  13 cells 

265 out of 559, 47.23%.  30 cells segmented
took 25.75s

266 out of 559, 47.41%.  17 cells segmented
took 21.97s

267 out of 559, 47.58%.  14 cells segmented
took 21.91s

268 out of 559, 47.76%.  102 cells segmented
took 32.47s

269 out of 559, 47.94%.  20 cells segmented
took 21.36s

270 out of 559, 48.12%.  16 cells segmented
took 21.93s

271 out of 559, 48.30%.  14 cells segmented
took 25.21s

272 out of 559, 48.48%.  30 cells segmented
took 50.87s

273 out of 559, 48.66%.  42 cells segmented
took 27.89s

274 out of 559, 48.84%.  66 cells segmented
took 26.86s

275 out of 559, 49.02%.  21 cells segmented
took 20.83s

276 out of 559, 49.19%.  21 cells segmented
took 24.52s

error in 828927e2-84f2-42bf-b62b-be90b7eacd6f
277 out of 559, 49.37%.  -1 cells segmented
took 3.44s

278 out of 559, 49.55%.  21 cells segmented
took 22.24s

279 out of 559, 49.73%.  35 cells segmented
took 23.80s

280 out of 559, 49.91%.  24 cells segmented
took 24.36s

281 out of 559, 50.09%.  15 cells segmente

398 out of 559, 71.02%.  17 cells segmented
took 24.80s

399 out of 559, 71.20%.  12 cells segmented
took 20.71s

400 out of 559, 71.38%.  35 cells segmented
took 24.30s

401 out of 559, 71.56%.  17 cells segmented
took 24.75s

402 out of 559, 71.74%.  24 cells segmented
took 22.04s

403 out of 559, 71.91%.  16 cells segmented
took 22.35s

404 out of 559, 72.09%.  22 cells segmented
took 23.59s

405 out of 559, 72.27%.  34 cells segmented
took 26.02s

406 out of 559, 72.45%.  11 cells segmented
took 21.36s

407 out of 559, 72.63%.  15 cells segmented
took 24.84s

error in bd661fb6-95bf-46fa-a884-fcc56c24e993
408 out of 559, 72.81%.  -1 cells segmented
took 3.09s

409 out of 559, 72.99%.  23 cells segmented
took 25.57s

410 out of 559, 73.17%.  17 cells segmented
took 23.42s

411 out of 559, 73.35%.  24 cells segmented
took 24.18s

412 out of 559, 73.52%.  22 cells segmented
took 25.02s

413 out of 559, 73.70%.  38 cells segmented
took 31.38s

414 out of 559, 73.88%.  13 cells segmented

533 out of 559, 95.17%.  39 cells segmented
took 27.68s

error in fa1c891d-7660-461d-8299-afd1aa2d6609
534 out of 559, 95.35%.  -1 cells segmented
took 3.86s

535 out of 559, 95.53%.  42 cells segmented
took 57.93s

536 out of 559, 95.71%.  7 cells segmented
took 24.13s

error in fb18d293-0c98-4ace-8c1b-fe9f17e9878e
537 out of 559, 95.89%.  -1 cells segmented
took 6.39s

538 out of 559, 96.06%.  26 cells segmented
took 26.10s

error in fb614e7d-3c4d-432c-8452-2d708acf5eb6
539 out of 559, 96.24%.  -1 cells segmented
took 3.15s

error in fc2319d7-6118-4677-a560-e40f05140317
540 out of 559, 96.42%.  -1 cells segmented
took 3.46s

541 out of 559, 96.60%.  14 cells segmented
took 28.26s

542 out of 559, 96.78%.  23 cells segmented
took 27.44s

543 out of 559, 96.96%.  28 cells segmented
took 27.57s

544 out of 559, 97.14%.  15 cells segmented
took 27.96s

545 out of 559, 97.32%.  13 cells segmented
took 25.17s

546 out of 559, 97.50%.  57 cells segmented
took 33.70s

547 out of 559, 97.67%.

  return array(a, dtype, copy=False, order=order, subok=True)


In [65]:
# This doesn't work
array_masks = np.load("/media/beta/mitko-beps/heather/encoded_masks.npy",  allow_pickle=True)

In [10]:
masks_flat_list = [item for sublist in all_enc_masks for item in sublist]
d = {"enc_masks": masks_flat_list}
df_enc = pd.DataFrame(data=d)
df_enc.to_csv("/media/beta/mitko-beps/heather/enc_masks.csv", index=False)

In [16]:
print(len(df_enc))
df_enc[0:10]

11179


Unnamed: 0,enc_masks
0,eNq1Vtty4jAM/SUUe+lLp53Z7swOtVGAUBwn5EIgN275/7...
1,eNqdUm1PwkAM/kvt3RgYDEaDkZd7QUKiCyIDCRgx8P+/eW...
2,eNoLCIgJMjbOzTc09AdB48gAP7OgfENbMM/IDyzma+4JoS...
3,eNqNUsuOwjAM/CXHrliQKg5oWV5NfIDLapG2qAgkLvz/jd...
4,eNp9UV2PmzAQ/Eus7SRVE6k9tbkmtVm+aiA0EAjxBUiA//...
5,eNqFVG1zojAQ/ktZoMrpjHfcqEwZWPRSlJdiqSIFLsr//3...
6,eNqtVemSojAQfiUazExt7QNMTVY6qyMqRziDiIq+/79NE0...
7,eNqNVG1z2jAM/kuWE9re3u52rDtoEpnRQC4NKSzQpKWs/P...
8,eNqVUk1vwjAM/UtxQrrDBJdtB7I0mcSIWAejlI8pwLT/f5...
9,eNqFVVl3okoQ/ksU6HjPHeZlzJxMjBREhGZfGpBF1P//dq...


In [6]:
""""
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dropout, Dense, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from efficientnet import inject_tfkeras_modules, init_tfkeras_custom_objects
from efficientnet import model

# Import EfficientNetB0
EfficientNetB0 = inject_tfkeras_modules(model.EfficientNetB0)

# Load architecture
def build_model(num_classes):
    input_shape = (224, 224, 4)
    inputs = Input(input_shape)
    base_model = EfficientNetB0(include_top=False, input_tensor = inputs, input_shape = input_shape, weights=None)

    # Rebuild top
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dropout(0.2)(x)
    x = Dense(num_classes, activation='sigmoid')(x)
    
    # Load the weights 
    model = tf.keras.models.Model(inputs, x)
    model.load_weights("/media/beta/mitko-beps/heather/Pretrained_model/EfficientNetB0_weights.hdf5")
    
    # Compile the model 
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
    model.compile(
            optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"]
            )

    
    return model
"""

In [7]:
#model = build_model(19)



In [26]:
print((all_enc_masks[0][0]))

eNq1Vtty4jAM/SUUe+lLp53Z7swOtVGAUBwn5EIgN275/7e1EsKtTaHtrE2SY0uWbelYJvw1YqMd9Opi11+wm/ex1jJAQtbQwjPJmdymcfWPau9k4n1/VzmpXCoCWqZCs4iP9O16UacdnJZVv7vsHs18NHnbbnd51t/Ybaaw4XZtnVvrf2Hf7T7Mg9a5/Gp/V23bav3V4Zejvc/lBh/k1/68jEfHtpt9NYs+G1/bPsgPrLkgm+nptePgM/3m7Q4Cexpi0tsvILJHIT47CfzeBPg0S+ClDPDRTeCvrzC3FIuHcQo7K/JwMkxSqKzAw9FwlYLDtIdbWaTgs7GHKznKIGCVxqUcZxCyvcaFdDJI2FpjIFUGJUs0KhllsGaxxpmMM9iwSOObXGSwZYHGqVxlsGdaoyNzQkrjRK4zqNhU41juMpjwvYuVcHJw+NrFrXBzeOOZi2sxz0HxpYuFCHPQPHYxF4scPB64mIlVDj73XExFnsOcK0IloSmhHaExoYpQpXAlJgUEfKcwEdMCIr5RGItZAQteKIyELiDhKSGf0FJhKMIClnyhcC7iAlIeKvTFsoCM+4RSQppQTkgp9MSa0JRGbGnEhNC+gBUfKwxERWg3M3M4pZljTUgRymdG6pdGupyZEWFprEQzYy8poeTuDLUoSwh/Qcf5vtm+Ok839E7HtstOl737ynW++a7+LTuf57XrUr0a50csMI6WJSETmlhuCL0RosAxh6QmmAtmgjmXb4ZYzATdlyb8KSsL8KQmlKyhma72T31ie/Ux/igEbao6Jq1DarIOSaitzc3T6h3SAp5CdSE/Ji84zns9j3lesoGhq9c3dM0GjiFzf2eIOzDEDQgtCYV9Q+FkYHYd9StCxhNJf1LCQ9f9cFbZ8OT9e+P3VX7c5uO95+Mn/PnOun42vvuc3jvf/1lXUzziSdSfl4aieCDzkdxfyl/v/05cyOsbsjkSF/e61d6dtZQNuaTv45/my4aNxPQTRZHh

In [27]:
import os
import tensorflow as tf
import pandas as pd 
import numpy as np

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [28]:
#load the model 
reload_model = tf.keras.models.load_model('/media/beta/mitko-beps/heather/MODEL_FT_HEATHER')
#reload_model = model

In [44]:
#
# load dataframe 
test = pd.read_csv('/media/beta/mitko-beps/heather/test_with_masks.csv')

sum(test["n_cells"])


9853

In [49]:
sample_submission = pd.read_csv("/media/beta/mitko-beps/data/sample_submission.csv")
sample_submission.head()
df.head()

Unnamed: 0,ImageWidth,ImageHeight,ID,PredictionString
0,2048,2048,0040581b-f1f2-4fbe-b043-b6bfea5404bb,0 1 eNoLCAgIMAEABJkBdQ==
1,2048,2048,004a270d-34a2-4d60-bbe4-365fca868193,0 1 eNoLCAgIMAEABJkBdQ==
2,2048,2048,00537262-883c-4b37-a3a1-a4931b6faea5,0 1 eNoLCAgIMAEABJkBdQ==
3,2048,2048,00c9a1c9-2f06-476f-8b0d-6d01032874a2,0 1 eNoLCAgIMAEABJkBdQ==
4,3072,3072,0173029a-161d-40ef-af28-2342915b22fb,0 1 eNoLCAgIsAQABJ4Beg==


In [20]:
imgs_flat_list = [item for sublist in test_images for item in sublist]
len(imgs_flat_list)

9853

In [32]:
prediction_list = []
confidence_list = []
classes_list = []

for one_list_with_images in (test_images): 
    pred = []
    cls = []    
    confid = []
    for image in one_list_with_images: 
        img = np.expand_dims(image, axis = 0)
        prediction = reload_model.predict(img) 
        pred.append(prediction)
        classes = np.argmax(prediction)
        cls.append(classes)
    for x in pred:
        confs = []
        
        for m in x: 
            confi = (max(m))
            confs.append(confi)
        
        confid.append(confs)
    
    flat_list = [item for sublist in confid for item in sublist]
    prediction_list.append(pred)
    classes_list.append(cls)
    confidence_list.append(flat_list)

In [16]:
flat_list = [item for sublist in classes_list for item in sublist]
print(flat_list.count(0))

5054


In [17]:
predString =[]
for i in range(len(classes_list)):
    string = []
    #print(len(confidence_list[i]))
    for x in range(len(classes_list[i])): 
        conf = confidence_list[i][x]
        cls = classes_list[i][x] 
        masks = all_enc_masks[i][x]
        string.append(str(cls) +  " " + str(conf) + " "+ str(masks)+ " ")
    
    
    x = "".join(string) # this is prediction string!!
    x = x[:-1]
    predString.append(x)
    

In [26]:
ids = []
heig = []
wid = []
for i, row in test_df.iterrows(): 
    ID = row.ID
    ids.append(ID)
    height = row.ImageHeight
    heig.append(height)
    width = row.ImageWidth 
    wid.append(width)

d = {'ID': ids, 'ImageWidth': wid, 'ImageHeight': heig, 'PredictionString': predString}
df = pd.DataFrame(data=d)

In [19]:
""""
ids = []
heig = []
wid = []
for i, row in test_df.iterrows(): 
    ID = row.ID
    ids.append(ID)

d = {'ID': ids, 'PredictionString': all_enc_masks}
df_mask = pd.DataFrame(data=d)
""""

In [29]:
df.head()

Unnamed: 0,ID,ImageWidth,ImageHeight,PredictionString
0,0040581b-f1f2-4fbe-b043-b6bfea5404bb,2048,2048,0 0.3772697 eNq1Vtty4jAM/SUUe+lLp53Z7swOtVGAUB...
1,004a270d-34a2-4d60-bbe4-365fca868193,2048,2048,16 0.45604905 eNoLCEhJMLDIyDc09Df0N/bNNwj0h7Ds...
2,00537262-883c-4b37-a3a1-a4931b6faea5,2048,2048,14 0.5436441 eNp1kn9PwjAQhr9SrwPcMFE0GYjQaxAYj...
3,00c9a1c9-2f06-476f-8b0d-6d01032874a2,2048,2048,12 0.3162971 eNrtVG1TwjAM/ktNN9npd7kx1lQBW1RA3...
4,0173029a-161d-40ef-af28-2342915b22fb,3072,3072,16 0.63141745 eNq1VNuSojAQ/aXQqAU8bhWKI028zYAB...


In [28]:
df.to_csv("/media/beta/mitko-beps/heather/FineTuneResults.csv", index=False)

In [33]:
df["PredictionString"][0]

'0 0.93544114 eNq1Vtty4jAM/SUUe+lLp53Z7swOtVGAUBwn5EIgN275/7e1EsKtTaHtrE2SY0uWbelYJvw1YqMd9Opi11+wm/ex1jJAQtbQwjPJmdymcfWPau9k4n1/VzmpXCoCWqZCs4iP9O16UacdnJZVv7vsHs18NHnbbnd51t/Ybaaw4XZtnVvrf2Hf7T7Mg9a5/Gp/V23bav3V4Zejvc/lBh/k1/68jEfHtpt9NYs+G1/bPsgPrLkgm+nptePgM/3m7Q4Cexpi0tsvILJHIT47CfzeBPg0S+ClDPDRTeCvrzC3FIuHcQo7K/JwMkxSqKzAw9FwlYLDtIdbWaTgs7GHKznKIGCVxqUcZxCyvcaFdDJI2FpjIFUGJUs0KhllsGaxxpmMM9iwSOObXGSwZYHGqVxlsGdaoyNzQkrjRK4zqNhU41juMpjwvYuVcHJw+NrFrXBzeOOZi2sxz0HxpYuFCHPQPHYxF4scPB64mIlVDj73XExFnsOcK0IloSmhHaExoYpQpXAlJgUEfKcwEdMCIr5RGItZAQteKIyELiDhKSGf0FJhKMIClnyhcC7iAlIeKvTFsoCM+4RSQppQTkgp9MSa0JRGbGnEhNC+gBUfKwxERWg3M3M4pZljTUgRymdG6pdGupyZEWFprEQzYy8poeTuDLUoSwh/Qcf5vtm+Ok839E7HtstOl737ynW++a7+LTuf57XrUr0a50csMI6WJSETmlhuCL0RosAxh6QmmAtmgjmXb4ZYzATdlyb8KSsL8KQmlKyhma72T31ie/Ux/igEbao6Jq1DarIOSaitzc3T6h3SAp5CdSE/Ji84zns9j3lesoGhq9c3dM0GjiFzf2eIOzDEDQgtCYV9Q+FkYHYd9StCxhNJf1LCQ9f9cFbZ8OT9e+P3VX7c5uO95+Mn/PnOun42vvuc3jvf/1lXUzziSdSfl4aieCDzkdxfyl/v/05cyOsbsjkSF/e61d6dtZQNuaTv45

In [25]:
df.to_csv("/media/beta/mitko-beps/heather/PRETRAIN.csv", index=False)

In [24]:
df = pd.read_csv("/media/beta/mitko-beps/heather/PRETRAIN_Kaggle.csv")

FileNotFoundError: [Errno 2] No such file or directory: '/media/beta/mitko-beps/heather/PRETRAIN_Kaggle.csv'

In [53]:
df["PredictionString"][0]

'16 0.2981982 eNq1Vtty4jAM/SUUe+lLp53Z7swOtVGAUBwn5EIgN275/7e1EsKtTaHtrE2SY0uWbelYJvw1YqMd9Opi11+wm/ex1jJAQtbQwjPJmdymcfWPau9k4n1/VzmpXCoCWqZCs4iP9O16UacdnJZVv7vsHs18NHnbbnd51t/Ybaaw4XZtnVvrf2Hf7T7Mg9a5/Gp/V23bav3V4Zejvc/lBh/k1/68jEfHtpt9NYs+G1/bPsgPrLkgm+nptePgM/3m7Q4Cexpi0tsvILJHIT47CfzeBPg0S+ClDPDRTeCvrzC3FIuHcQo7K/JwMkxSqKzAw9FwlYLDtIdbWaTgs7GHKznKIGCVxqUcZxCyvcaFdDJI2FpjIFUGJUs0KhllsGaxxpmMM9iwSOObXGSwZYHGqVxlsGdaoyNzQkrjRK4zqNhU41juMpjwvYuVcHJw+NrFrXBzeOOZi2sxz0HxpYuFCHPQPHYxF4scPB64mIlVDj73XExFnsOcK0IloSmhHaExoYpQpXAlJgUEfKcwEdMCIr5RGItZAQteKIyELiDhKSGf0FJhKMIClnyhcC7iAlIeKvTFsoCM+4RSQppQTkgp9MSa0JRGbGnEhNC+gBUfKwxERWg3M3M4pZljTUgRymdG6pdGupyZEWFprEQzYy8poeTuDLUoSwh/Qcf5vtm+Ok839E7HtstOl737ynW++a7+LTuf57XrUr0a50csMI6WJSETmlhuCL0RosAxh6QmmAtmgjmXb4ZYzATdlyb8KSsL8KQmlKyhma72T31ie/Ux/igEbao6Jq1DarIOSaitzc3T6h3SAp5CdSE/Ji84zns9j3lesoGhq9c3dM0GjiFzf2eIOzDEDQgtCYV9Q+FkYHYd9StCxhNJf1LCQ9f9cFbZ8OT9e+P3VX7c5uO95+Mn/PnOun42vvuc3jvf/1lXUzziSdSfl4aieCDzkdxfyl/v/05cyOsbsjkSF/e61d6dtZQNuaTv45