In [1]:
import pandas as pd
import numpy as np

In [2]:
csv_file_path = '/mnt/d/data.csv'  
df = pd.read_csv(csv_file_path)

In [3]:
fraction = 0.5  # Use 10% of the data, adjust as needed
df_sampled = df.sample(frac=fraction, random_state=42)

In [4]:
pixel_columns = [f'pixel_{str(i).zfill(4)}' for i in range(1024)]

In [5]:
X = df_sampled[pixel_columns].astype(np.float32).values / 255.0

In [6]:
def process_batch(batch, img_height, img_width):
    batch = batch.reshape(-1, img_height, img_width, 1)
    return batch

In [7]:
from skimage.transform import resize

In [8]:
def resize_images(X, new_height, new_width):
    num_images = X.shape[0]
    X_resized = np.zeros((num_images, new_height, new_width))
    for i in range(num_images):
        X_resized[i] = resize(X[i].reshape(32, 32), (new_height, new_width), anti_aliasing=True)
    return X_resized

In [9]:
new_height, new_width = 16, 16
X_resized = resize_images(X, new_height, new_width)
X_resized = X_resized.reshape(-1, new_height, new_width, 1)

In [10]:
y = df_sampled['character'].values

In [11]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

2024-06-20 11:42:25.164716: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [12]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)
y = to_categorical(y, num_classes)

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_val, y_train, y_val = train_test_split(X_resized, y, test_size=0.2, random_state=42)


In [15]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model

In [16]:
def create_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = Flatten()(inputs)
    x = Dense(512, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs, outputs)
    return model

In [17]:
input_shape = (new_height, new_width, 1)
model = create_model(input_shape, num_classes)
model.summary()

2024-06-20 11:42:47.984077: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-20 11:42:47.985586: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [34]:
epochs = 3
batch_size = 32
learning_rate = 0.001


In [19]:
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
loss_fn = tf.keras.losses.CategoricalCrossentropy()

In [20]:
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric = tf.keras.metrics.CategoricalAccuracy()

In [21]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(buffer_size=1024).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size)


2024-06-20 11:43:11.984785: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 18841600 exceeds 10% of free system memory.


In [35]:
for epoch in range(epochs):
    print(f'Start of epoch {epoch+1}')
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = model(x_batch_train, training=True)
            loss_value = loss_fn(y_batch_train, logits)
        
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        train_acc_metric.update_state(y_batch_train, logits)

        if step % 100 == 0:
            print(f'Training loss at step {step}: {float(loss_value):.4f}')
            print(f'Training accuracy: {float(train_acc_metric.result()):.4f}')
            train_acc_metric.reset_state()

    # Validation loop
    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        val_acc_metric.update_state(y_batch_val, val_logits)

    val_acc = val_acc_metric.result()
    print(f'Validation accuracy: {float(val_acc):.4f}')
    val_acc_metric.reset_state()

Start of epoch 1
Training loss at step 0: 0.0002
Training accuracy: 0.9933
Training loss at step 100: 0.0477
Training accuracy: 0.9922
Training loss at step 200: 0.0186
Training accuracy: 0.9878
Training loss at step 300: 0.0908
Training accuracy: 0.9831
Training loss at step 400: 0.0003
Training accuracy: 0.9828
Training loss at step 500: 0.0302
Training accuracy: 0.9847
Training loss at step 600: 0.0030
Training accuracy: 0.9856
Training loss at step 700: 0.0008
Training accuracy: 0.9841
Training loss at step 800: 0.0605
Training accuracy: 0.9856
Training loss at step 900: 0.0028
Training accuracy: 0.9881
Training loss at step 1000: 0.0033
Training accuracy: 0.9909
Training loss at step 1100: 0.0068
Training accuracy: 0.9866


2024-06-20 12:09:34.384597: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-20 12:09:35.703218: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Validation accuracy: 0.9214
Start of epoch 2
Training loss at step 0: 0.0004
Training accuracy: 0.9925
Training loss at step 100: 0.0625
Training accuracy: 0.9891
Training loss at step 200: 0.0053
Training accuracy: 0.9887
Training loss at step 300: 0.0037
Training accuracy: 0.9881
Training loss at step 400: 0.0102
Training accuracy: 0.9875
Training loss at step 500: 0.0002
Training accuracy: 0.9906
Training loss at step 600: 0.0001
Training accuracy: 0.9925
Training loss at step 700: 0.0056
Training accuracy: 0.9934
Training loss at step 800: 0.0234
Training accuracy: 0.9887
Training loss at step 900: 0.0017
Training accuracy: 0.9947
Training loss at step 1000: 0.0023
Training accuracy: 0.9881
Training loss at step 1100: 0.0201
Training accuracy: 0.9887


2024-06-20 12:10:13.996706: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-20 12:10:15.306898: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Validation accuracy: 0.9313
Start of epoch 3
Training loss at step 0: 0.0052
Training accuracy: 0.9919
Training loss at step 100: 0.1601
Training accuracy: 0.9894
Training loss at step 200: 0.1617
Training accuracy: 0.9847
Training loss at step 300: 0.0068
Training accuracy: 0.9850
Training loss at step 400: 0.0012
Training accuracy: 0.9831
Training loss at step 500: 0.0045
Training accuracy: 0.9809
Training loss at step 600: 0.1649
Training accuracy: 0.9841
Training loss at step 700: 0.0013
Training accuracy: 0.9903
Training loss at step 800: 0.0899
Training accuracy: 0.9884
Training loss at step 900: 0.0042
Training accuracy: 0.9866
Training loss at step 1000: 0.0041
Training accuracy: 0.9847
Training loss at step 1100: 0.1410
Training accuracy: 0.9819


2024-06-20 12:10:52.983554: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Validation accuracy: 0.9177


2024-06-20 12:10:54.326589: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
