In [None]:
import os, sys
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import joblib
import time
import shutil
import collections
from pathlib import Path

In [None]:
import warnings
warnings.filterwarnings('ignore')

### Loading packages

In [None]:
import sys
from pathlib import Path

here_path = Path().resolve()
repo_path = here_path.parents[1]
sys.path.append(str(repo_path))

In [None]:
from py.utils import verifyDir, verifyFile, verifyType

In [None]:
from py.config import Config

cfg = Config()

np.random.seed(cfg.RANDOM_STATE)
cfg.DATA_PATH, cfg.MODEL_PATH

In [None]:
QSCORE_PATH=f"{cfg.DATA_PATH}pp2/{cfg.SCORING_METHOD}/{cfg.PLACE_LEVEL}/"
IMAGES_PATH = f"{cfg.DATA_PATH}pp2/images/"

SEG_DATASER_DIR = f"{cfg.DATA_PATH}{cfg.DATASET_SEG_NAME}/"

FEATURES_PATH = f"{cfg.DATA_PATH}pp2/segmentations/{cfg.DATASET_SEG_NAME}/{cfg.MODEL_SEG_NAME}/"

In [None]:
verifyDir(FEATURES_PATH)

### Loading data

In [None]:
%%time
data_df = pd.read_csv(f"{QSCORE_PATH}scores.csv", sep=";", low_memory=False)
if cfg.CITY_STUDIED.lower() != "all":
    data_df = data_df[data_df["city"]==cfg.CITY_STUDIED].copy()
data_df["image_path"] = f"{IMAGES_PATH}" + data_df["image_path"]
data_df.sort_values(by=[cfg.PERCEPTION_METRIC], ascending=False, inplace=True)
data_df

### Feature Extraction

In [None]:
features_dict = {"image_id": data_df["image_id"].tolist(), 
                 "image_path": data_df["image_path"].tolist(),
                 cfg.DATASET_SEG_NAME: [],
                }

In [None]:
from py.models.datasets.transformations import ImageTransforms

image_transform = ImageTransforms().get(type_transform="test")
image_transform

In [None]:
from py.models.segmentation.cnn import ConvMaskClassifier

seg_model = ConvMaskClassifier(dataset=cfg.DATASET_SEG_NAME, backbone=cfg.MODEL_SEG_NAME)
seg_model.get_model()

In [None]:
%%time

seg_dict = { k[1]: [] for k in seg_model._get_color_dict()}
seg_dict["image_id"] = []

for i, current_city in enumerate(data_df["city"].unique()):
    print(f"{i+1}, Evaluating city {current_city}")
    city_df = data_df[data_df["city"]==current_city].copy()
    OUT_DIR = f"{FEATURES_PATH}/{current_city}/"
    verifyDir(OUT_DIR)
    verifyDir(f"{OUT_DIR}/masks/")
    verifyDir(f"{OUT_DIR}/segmented_images/")
    verifyDir(f"{OUT_DIR}/segmented_images_overlay/")
    for idx, row in tqdm(city_df.iterrows()):
        image_path = row["image_path"]
        image_id = row["image_id"]
        mask_matrix, mask_image, mask_overlay_image = seg_model.extract_masks(image_path, transforms_list=image_transform)
        mask_features = seg_model._get_objects_ratio(mask_matrix, factor=100)
        # masks
        joblib.dump(mask_matrix, f"{OUT_DIR}/masks/{image_id}.pkl")
        # seg image
        mask_image.save(f"{OUT_DIR}/segmented_images/{image_id}.png")
        # seg overlay image
        mask_overlay_image.save(f"{OUT_DIR}/segmented_images_overlay/{image_id}.png")
        
        seg_dict["image_id"].append(image_id)
        for k in seg_dict.keys():
            if k == "image_id":
                continue
            if k in mask_features:
                seg_dict[k].append(mask_features[k])
            else:
                seg_dict[k].append(0.0)

In [None]:
segmentations_df = pd.DataFrame(data=seg_dict)
segmentations_df

### Saving features

In [None]:
segmentations_df.to_csv(f"{FEATURES_PATH}segmentations.csv", sep=";", index=False)