In [None]:
import os, sys
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import shutil
from pathlib import Path

In [None]:
import warnings
warnings.filterwarnings('ignore')

### Loading packages

In [None]:
import sys
from pathlib import Path

here_path = Path().resolve()
repo_path = here_path.parents[1]
sys.path.append(str(repo_path))

In [None]:
from py.utils import verifyDir, verifyFile, verifyType

In [None]:
from py.config import Config

cfg = Config()

np.random.seed(cfg.RANDOM_STATE)
cfg.DATA_PATH, cfg.MODEL_PATH

In [None]:
QSCORE_PATH=f"{cfg.DATA_PATH}pp1/Qscores/"
IMAGES_PATH = f"{cfg.DATA_PATH}pp1/images/"
FEATURES_PATH = f"{cfg.MODEL_PATH}pp1/{cfg.YEAR_STUDIED}/features/"

In [None]:
verifyDir(FEATURES_PATH)

### Loading data

In [None]:
%%time
data_df = pd.read_csv(f"{QSCORE_PATH}scores.csv", sep=";", low_memory=False)
data_df["image_path"] = f"{IMAGES_PATH}{cfg.YEAR_STUDIED}/" + data_df["image_path"]
data_df.sort_values(by=[cfg.PERCEPTION_METRIC], ascending=False, inplace=True)
data_df

### Feature Extraction

In [None]:
features_dict = {"image_id": data_df["image_id"].tolist(), 
                 "image_path": data_df["image_path"].tolist(),
                 "gist": [],
                 "sift": [],
                 "fisher": [],
                }

#### GIST

In [None]:
from py.features.gist import extract_gist_features

In [None]:
%%time
gist_features = []
for img_path in tqdm(features_dict["image_path"]):
    desc = extract_gist_features(img_path)
    if desc is not None:
        gist_features.append(desc)
    else:
        gist_features.append(None)
        print(f"Not Extracted {len(desc)} descriptors from {img_path}")

In [None]:
features_dict["gist"] = np.array(gist_features).tolist()

In [None]:
print(f"GIST shape: {gist_features[0].shape}")
print(f"GIST norm: {np.linalg.norm(gist_features):.4f}")

#### SIFT

In [None]:
from py.features.sift import extract_sift_descriptors

In [None]:
%%time
sift_descriptors = []
for img_path in tqdm(features_dict["image_path"]):
    desc = extract_sift_descriptors(img_path)
    if desc is not None:
        sift_descriptors.append(desc)
        #print(f"Extracted {len(desc)} descriptors from {img_path}")
    else:
        sift_descriptors.append(None)
        print(f"Not Extracted {len(desc)} descriptors from {img_path}")

In [None]:
features_dict["sift"] = sift_descriptors

In [None]:
print(f"SIFT shape: {sift_descriptors[0].shape}")

In [None]:
shapes = [a.shape for a in sift_descriptors]
unique_shapes = sorted(set(shapes))
print("Num arrays:", len(sift_descriptors))
print("Unique shapes:", unique_shapes[:20], "..." if len(unique_shapes) > 20 else "")
print("How many unique shapes:", len(unique_shapes))

### Fisher

In [None]:
from py.features.fisher import FisherVectorEncoder

In [None]:
%%time
fv_encoder = FisherVectorEncoder(n_components=32)
fv_encoder.fit(features_dict["sift"])
print(f"Trained GMM with {fv_encoder.n_components} components")

In [None]:
%%time
fisher_vectors = []
for descriptor in tqdm(features_dict["sift"]):
    try:
        fisher_vec = fv_encoder.encode(descriptor)
        fisher_vectors.append(fisher_vec)
    except Exception as e:
        fisher_vectors.append(None)
        print(f"Error {e} extracting ...")

In [None]:
features_dict["fisher"] = np.array(fisher_vectors).tolist()

In [None]:
print(f"Fisher Vector shape: {len(fisher_vectors[0])}")
print(f"Fisher Vector norm: {np.linalg.norm(fisher_vectors):.4f}")

### Saving features

In [None]:
%%time
import pickle

with open(f"{FEATURES_PATH}features.pkl", "wb") as f:
    pickle.dump(features_dict, f)