In [1]:
!nvidia-smi

Tue Nov 12 22:27:21 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.161.07             Driver Version: 535.161.07   CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Quadro RTX 8000                Off | 00000000:23:00.0 Off |                  Off |
| 33%   37C    P5              27W / 260W |      6MiB / 49152MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
#%pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12
#%pip install cucim-cu12 cupy-cuda12x

In [3]:
%pip install fastparquet

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
#%load_ext cudf.pandas

# To desable GPU usage
#import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

#from cucim.skimage.exposure import rescale_intensity
import tensorflow as tf
#import cupy as cp
#import torch
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import seaborn as sns
import glob
from sklearn.model_selection import train_test_split

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # memory limit 16GB (16 * 1024 MB = 16384 MB) 
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=16384)] 
        )
        print("Set GPU memory limit to 16GB.")
    except RuntimeError as e:
        print("Error setting memory limit:", e)
else:
    print("No GPUs available.")

#print("Is torch using cuda? ",torch.cuda.is_available())
print("Is tensorflow using cuda? ",tf.test.is_built_with_cuda())
print("Is pandas using cuda? ",pd)


2024-11-12 22:27:24.087000: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-12 22:27:24.118966: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-12 22:27:24.128779: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-12 22:27:24.156396: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Set GPU memory limit to 16GB.
Is tensorflow using cuda?  True
Is pandas using cuda?  <module 'pandas' from '/usr/local/lib/python3.11/dist-packages/pandas/__init__.py'>


In [5]:
name_mapping = [
    "box",
    "circularTorus",
    "cone",
    "coneOffset",
    "cylinder",
    "cylinderSlope",
    "dish",
    "mesh",
    "pyramid",
    "rectangularTorus",
    "sphere"
]

In [6]:
def sort_by_number(texts:list[str]):
    def key(text:str):
        text = re.sub(r'.*photos_', '', text)
        text = re.sub(r'\.csv', '', text)
        text = re.sub(r'\D', '', text)
        return int(text)
    return sorted(texts, key=key)

In [7]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import glob
import gc

base_path = '/home/workspace/geometry-classifier/data/'
parquet_files = glob.glob(base_path + 'photos_v3_parquet/*.parquet')

def data_generator():
    counter = 0
    encoder = LabelEncoder()
    
    for file in parquet_files:
        df = pd.read_parquet(file)
        

        df = df.drop(columns=['id'], axis=1)
        df['name'] = encoder.fit_transform(df['name'])
        
        X_train, X_aux, y_train, y_aux = train_test_split(df.drop(columns=["name"]), df['name'], test_size=0.4, random_state=42)
        X_val, X_test, y_val, y_test = train_test_split(X_aux, y_aux, test_size=0.5, random_state=42)
        
        if len(X_train) != len(y_train) or len(X_val) != len(y_val):
            raise ValueError("Mismatch in number of samples between features and labels")
        
        if counter % 5 == 0:
            training_file = f"training_{counter // 5}.parquet"
            training_df = pd.DataFrame(X_train)
            training_df['name'] = y_train
            training_df.to_parquet(base_path + f"training/{training_file}")
        
        X = np.array([x.reshape(224, 224, 1).astype(np.uint8) for x in X_train.to_numpy()])
        X_val = np.array([x.reshape(224, 224, 1).astype(np.uint8) for x in X_val.to_numpy()])
        
        Y = y_train.values
        y_val = y_val.values

        yield X, Y, X_val, y_val
        
        del df, X_train, y_train, X_test, y_test, Y, X, X_val, y_val, X_aux, y_aux
        gc.collect()
        
        counter += 1


In [8]:
# 224 x 224

from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint

model = models.Sequential([

    layers.Conv2D(8, (3, 3), activation='relu', input_shape=(224, 224, 1)),
    layers.MaxPooling2D((2, 2)),
    

    layers.Conv2D(16, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    

    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    

    layers.Flatten(),
    
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(10, activation='softmax') 
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-11-12 22:27:27.939941: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16384 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:23:00.0, compute capability: 7.5


In [9]:
checkpoint_callback = ModelCheckpoint(base_path+'model.keras',save_best_only=True, save_weights_only=False, mode='min', verbose=1)

epochs = 100

for i in range(100):
    print(f"Epochs {i+1}/{epochs}")
    j = 0
    for X_train, Y_train, X_val, Y_val in data_generator():
        j+=1
        print(f"Datasets: {j}/1042")
        cnn = model.fit(X_train,Y_train, epochs=1, callbacks=[checkpoint_callback], batch_size=8, validation_data=(X_val, Y_val), verbose=1)



[1m72/77[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Epoch 1: val_loss did not improve from 0.00000
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
[1m71/77[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Epoch 1: val_loss did not improve from 0.00000
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
[1m73/77[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Epoch 1: val_loss did not improve from 0.00000
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
[1m72/77[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [

KeyboardInterrupt: 