In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import os 
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler,  RobustScaler
import pickle
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetV2M, ConvNeXtBase
import numpy as np
import gc
from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, Concatenate
from tensorflow.keras.models import Model


In [None]:
study_name = '426_convnextbase_003_998_1'

pickle_file_path = f'./data/test_{study_name}.pickle'

with open(pickle_file_path, 'rb') as f:
    test_df = pickle.load(f)

pickle_file_path = f'./data/train_{study_name}.pickle'

with open(pickle_file_path, 'rb') as f:
    train_df = pickle.load(f)

In [None]:
this_study_name = '511_convnextbase_avgmax_3'

In [None]:
pd.set_option('display.max_row', None) 
pd.set_option('display.max_columns', None) 

In [None]:
train_df.head()

In [None]:
base_model = ConvNeXtBase(weights='imagenet', include_top=False, pooling=None)
base_model.trainable = False

# for layer in base_model.layers:
#     print(f'Layer {layer.name}')

avg_pool = GlobalAveragePooling2D()(base_model.output)
max_pool = GlobalMaxPooling2D()(base_model.output)

con = Concatenate()([avg_pool, max_pool])

feature_model = Model(inputs = base_model.input, outputs = con)

feature_model.summary()

In [None]:

batch_size = 32

os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'


def load_and_preprocess_image(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (480, 480))
    
    return img


def extract_features_batch(image_paths):
    img_batch = np.stack([load_and_preprocess_image(img_path) for img_path in image_paths])
    features = feature_model.predict(img_batch, batch_size=batch_size)  
    return features

image_paths = train_df['image_path'].values

features_list = []
j = 0
for i in range(0, len(image_paths), batch_size):
    batch_paths = image_paths[i:i+batch_size]
    batch_features = extract_features_batch(batch_paths)
    features_list.append(batch_features)
    j += 1
    if j % 30 == 0:
        tf.keras.backend.clear_session()
        gc.collect()
        print(f'Clearing session')

all_features = np.vstack(features_list)
train_df['features_avg'] = list(all_features)




In [None]:
train_df.head()

In [None]:
train_df.info()

In [None]:
import pickle

# Specify the file path to save the pickle file
pickle_file_path = './data/train_convnextbase_df_003_998.pickle'

# Save the train_df dataframe as a pickle file
with open(pickle_file_path, 'wb') as f:
    pickle.dump(train_df, f)


In [None]:

os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'


base_model = ConvNeXtBase(weights='imagenet', include_top=False, pooling='avg')
base_model.trainable = False

def load_and_preprocess_image(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (480, 480))
    
    return img


def extract_features_batch(image_paths):
    img_batch = np.stack([load_and_preprocess_image(img_path) for img_path in image_paths])
    features = base_model.predict(img_batch)        
    return features

image_paths = test_df['image_path'].values

features_list = []
j = 0
for i in range(0, len(image_paths), batch_size):
    batch_paths = image_paths[i:i+batch_size]
    batch_features = extract_features_batch(batch_paths)
    features_list.append(batch_features)
    j += 1
    if j % 30 == 0:
        tf.keras.backend.clear_session()
        gc.collect()
        print(f'Clearing session')

all_features = np.vstack(features_list)
test_df['features_avg'] = list(all_features)




In [None]:
print(test_df.shape)
print(test_df.head())

In [None]:
pickle_file_path = './data/test_convnextbase_df_003_998.pickle'

# # Save the train_df dataframe as a pickle file
with open(pickle_file_path, 'wb') as f:
    pickle.dump(test_df, f)

test_df.head()