# Inference with a pretrained model

In [2]:
from model_components.k_means import  CustomKMeans
import numpy as np
from model_components.CNN_model import Cplx_CustomCNN_1D
from model_components.Attention_Layer import  AttentionLayer
from model_components.classification import CustomClassifierModel
import tensorflow as tf

import re
import numpy as np
import pandas as pd

## DATA

In [3]:
# Load the data
df = pd.read_csv('/Users/thibaultgillard/Documents/EPF/git/Data/post_processed_data_train_100.csv')

def extract_multiple_arrays(string_repr):
    # Remove letters, parentheses, spaces, and unwanted sequences
    string_repr = re.sub('[a-df-zA-DF-Z\(\)\s]', '', string_repr)
    string_repr = re.sub(r',=32', '', string_repr)

    # Find all matches of arrays within the string
    arrays = re.findall(r'\[.*?\]', string_repr)

    # Convert each found array string into a NumPy array
    #np_arrays = [np.array(re.findall(r'[+-]?\d+(?:\.\d+)?', array), dtype=float) for array in arrays]
    np_arrays = [np.array(re.findall(r'[+-]?\d+(?:\.\d+)?(?:e[+-]?\d+)?', array), dtype=float) for array in arrays]
    return np_arrays

df['pixels_value'] = df['pixels_value'].apply(extract_multiple_arrays)
# pixels float to int
print(df.head())

   Unnamed: 0  segment_id  polygon_id  class_id  \
0           0      364025         3.0       1.0   
1           1      367183         4.0       2.0   
2           2      369134         4.0       2.0   
3           3      369135         4.0       2.0   
4           4      370405         4.0       2.0   

                                              pixels  \
0  [[1133, 2933], [1133, 2934], [1133, 2935], [11...   
1  [[1144, 2945], [1144, 2946], [1144, 2947], [11...   
2  [[1149, 2941], [1149, 2942], [1149, 2943], [11...   
3  [[1149, 2945], [1149, 2946], [1149, 2947], [11...   
4  [[1153, 2941], [1153, 2942], [1153, 2943], [11...   

                                    Perimeter Pixels  \
0  [[1133, 2933], [1134, 2933], [1135, 2933], [11...   
1  [[1144, 2945], [1145, 2945], [1146, 2945], [11...   
2  [[1149, 2941], [1150, 2941], [1151, 2941], [11...   
3  [[1149, 2945], [1150, 2945], [1151, 2945], [11...   
4  [[1153, 2941], [1154, 2941], [1155, 2941], [11...   

                   

## ALGORITHM

In [None]:
# K-means
stacked_arrays = []

# Stack the arrays for each cell
for i, cell_pixels in enumerate(df['pixels_value']):
    stacked_array = np.vstack(cell_pixels)
    stacked_arrays.append(stacked_array)
        
n_clusters=2
custom_kmeans = CustomKMeans(n_clusters=n_clusters)

# Fit the model to the data and get the cluster centers
clustered_data = []
clustered_labels = []
for i in range (len(stacked_arrays)):
    custom_kmeans.fit(stacked_arrays[i])
    clustered_data.append(custom_kmeans.get_cluster_centers())
    clustered_labels.append(custom_kmeans.get_cluster_labels())
    
clusters = np.array(clustered_data)
print(clusters.shape)

In [5]:
print(clusters.shape)

(2324, 2, 73, 10)


In [6]:
tf.config.run_functions_eagerly(True)

In [7]:
import tensorflow as tf
import numpy as np

class Custom_Model(tf.keras.Model):
    def __init__(self):
        super(Custom_Model, self).__init__()
        self.enc = Cplx_CustomCNN_1D()
        self.attn = AttentionLayer()
        self.classifier = CustomClassifierModel(num_classes=8, fc_units=64)
        
    def call(self, inputs):
        input_list = tf.unstack(inputs, axis=1)
        intermediate = [self.enc(input) for input in input_list]
        intermediate = tf.stack(intermediate, axis=1)        
        emb, alphas = self.attn(intermediate)
        return self.classifier(emb), alphas

### Initialize the model

In [8]:
# Initialize the model
model = Custom_Model()
input_shape = (2, 73, 10)

# Create a dummy input tensor for model initialization
dummy_input = tf.random.normal((1,) + input_shape)

# Pass the dummy input through the model to initialize the layers
model(dummy_input)

2024-01-09 01:01:32.303110: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-01-09 01:01:32.303168: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-01-09 01:01:32.303174: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-01-09 01:01:32.303242: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-09 01:01:32.303429: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


(<tf.Tensor: shape=(1, 8), dtype=float32, numpy=
 array([[0.12246784, 0.12367103, 0.12714456, 0.12489398, 0.12778844,
         0.12337242, 0.12485549, 0.12580626]], dtype=float32)>,
 <tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.49944344, 0.50055665]], dtype=float32)>)

In [9]:
# Load the weights
SAVE_PATH = 'saved_models/'

model.load_weights(SAVE_PATH + 'model_weights_300_epochs.h5')

## RESULTS

In [23]:
# Custom training loop to efficiently compute predictions and alphas
batch_size = 32
predictions = []
all_alphas = []

for i in range(0, len(clusters), batch_size):
    batch_clusters = clusters[i:i+batch_size]
    batch_predictions, batch_alphas = model(batch_clusters)
    predictions.append(batch_predictions)
    all_alphas.append(batch_alphas)

# Concatenate predictions and alphas
predictions = tf.concat(predictions, axis=0)
all_alphas = tf.concat(all_alphas, axis=0)
alphas = np.array(all_alphas)
df_inferring = df.iloc[:, 1:6]

In [24]:
import math


df_inferring = df.iloc[:, 1:6]

# Get the class with the highest probability
df_inferring['class_id'] = np.argmax(predictions, axis=1) + 1
df_inferring['class_id'] = df_inferring['class_id'].astype('int8')

# Pixels' cluster
df_inferring['pixels_cluster'] = clustered_labels


df_inferring['alphas'] = alphas.tolist()

# Function to round to the nearest hundredth
def round_to_hundredth(lst):
    return [math.ceil(num * 100) / 100 for num in lst]

df_inferring['alphas'] = df_inferring['alphas'].apply(round_to_hundredth)


# Function to replace the cluster labels with the corresponding alpha values, ideal to make a heatmap
def replace_values(row):
    return [row['alphas'][val] for val in row['pixels_cluster']]

df_inferring['heatmap'] = df_inferring.apply(replace_values, axis=1)

df_inferring.head()


Unnamed: 0,segment_id,polygon_id,class_id,pixels,Perimeter Pixels,pixels_cluster,alphas,heatmap
0,364025,3.0,1,"[[1133, 2933], [1133, 2934], [1133, 2935], [11...","[[1133, 2933], [1134, 2933], [1135, 2933], [11...","[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[0.43, 0.58]","[0.58, 0.58, 0.58, 0.58, 0.58, 0.58, 0.58, 0.5..."
1,367183,4.0,2,"[[1144, 2945], [1144, 2946], [1144, 2947], [11...","[[1144, 2945], [1145, 2945], [1146, 2945], [11...","[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, ...","[0.5, 0.51]","[0.51, 0.51, 0.51, 0.51, 0.51, 0.51, 0.51, 0.5..."
2,369134,4.0,2,"[[1149, 2941], [1149, 2942], [1149, 2943], [11...","[[1149, 2941], [1150, 2941], [1151, 2941], [11...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]","[0.57, 0.44]","[0.57, 0.57, 0.57, 0.57, 0.57, 0.57, 0.57, 0.5..."
3,369135,4.0,2,"[[1149, 2945], [1149, 2946], [1149, 2947], [11...","[[1149, 2945], [1150, 2945], [1151, 2945], [11...","[1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1]","[0.75, 0.26]","[0.26, 0.26, 0.75, 0.75, 0.26, 0.26, 0.75, 0.7..."
4,370405,4.0,2,"[[1153, 2941], [1153, 2942], [1153, 2943], [11...","[[1153, 2941], [1154, 2941], [1155, 2941], [11...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1]","[0.55, 0.46]","[0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.5..."
