# Import Libraries

In [9]:
import os
import requests
import zipfile
import numpy as np
import io
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

In [None]:
CKAN_DATASET_ID = "b987cf48-1b6c-44c7-981a-d25da07f1cd3"
DATASET_DIRECTORY = os.getcwd() + f"\\{CKAN_DATASET_ID}\\Retina Blood Vessel Segmentation"

# Get Dataset From CKAN With URL

In [None]:
# Extracting zip file
def extract_zip(ckan_dataset_id: str):
    with zipfile.ZipFile(f'{ckan_dataset_id}.zip', 'r') as zip_ref:
        zip_ref.extractall(f'{ckan_dataset_id}')

#  {{CKAN_API_URL}} = http://192.168.1.15:30020 
#  b987cf48-1b6c-44c7-981a-d25da07f1cd3
# Loading dataset
def load_data(ckan_dataset_id: str):
    url = "{{CKAN_API_URL}}" + f"/api/3/action/package_show?id={ckan_dataset_id}"
    response = requests.get(url, verify=False)
    fetch_data = response.json()
    resources = fetch_data["result"]["resources"]
    zip_resource = None
    for resource in resources:
        if resource['format'].lower() == 'zip':
            zip_resource = resource
            break
    if zip_resource is None:
        raise ValueError("No zip file found in the dataset")

    zip_url = zip_resource['url']
    zip_response = requests.get(zip_url)
    with open(f'{ckan_dataset_id}.zip', 'wb') as f:
        f.write(zip_response.content)

    extract_zip(ckan_dataset_id)
    os.remove(f'{ckan_dataset_id}.zip')

In [None]:
load_data(CKAN_DATASET_ID)

# Read Data

In [None]:
def read_data(image_dir, mask_dir):
    images = []
    masks = []
    for filename in sorted(os.listdir(image_dir)):
        if filename.endswith('.png'):
            # Load image
            image = plt.imread(os.path.join(image_dir, filename))
            images.append(image)
            # Load mask
            mask = plt.imread(os.path.join(mask_dir, filename))
            masks.append(mask)
    return np.array(images), np.array(masks)

# Define directories for images and masks
image_dir = DATASET_DIRECTORY + "\\image"
mask_dir = DATASET_DIRECTORY + "\\mask"

# Load data
images, masks = read_data(image_dir, mask_dir)

# Split Dataset

In [13]:
# Split data into train and test sets
train_images, test_images, train_masks, test_masks = train_test_split(images, masks, test_size=0.2, random_state=42)

# Train Model


In [14]:
model = keras.Sequential([
    layers.Input(shape=train_images[0].shape),
    layers.Conv2D(32, 3, activation='relu', padding='same'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu', padding='same'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu', padding='same'),
    layers.UpSampling2D(),
    layers.Conv2D(64, 3, activation='relu', padding='same'),
    layers.UpSampling2D(),
    layers.Conv2D(1, 3, activation='sigmoid', padding='same')
])

In [15]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(train_images, train_masks, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate Model Performance

In [None]:
test_loss, test_accuracy = model.evaluate(test_images, test_masks)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

In [None]:
import cloudpickle
predict_pkl = cloudpickle.dumps(model)
with open("predict.pkl", "wb") as f:
    f.write(model.predict(test_masks))