In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip -q drive/MyDrive/data/cropped.zip

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models
from tensorflow.keras.layers import LSTM

In [None]:
data = pd.read_csv('drive/MyDrive/data/data.csv')

def preprocess_image(image_path, target_size=(28, 28)):
    if pd.isna(image_path):
        return None
    image_path = image_path.split('/')[-1]
    img = load_img(f"cropped/{image_path}", target_size=target_size)
    img = img_to_array(img)
    img = np.expand_dims(img, axis=-1)
    img = img.astype('float32') / 255.0
    return img

# Filter out NaN values before processing
valid_image_paths = data['image'].dropna()

images = []
for path in tqdm(valid_image_paths):
    processed_image = preprocess_image(path)
    if processed_image is not None:
        images.append(processed_image)
images = np.array(images)

label_encoder = LabelEncoder()
# Use the same filtering to get corresponding labels
labels = label_encoder.fit_transform(data.loc[valid_image_paths.index, 'phone'])
labels = to_categorical(labels)  # One-hot encode the labels

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

X_train = X_train.reshape((X_train.shape[0], 28, 28))
X_test = X_test.reshape((X_test.shape[0], 28, 28))

model = models.Sequential([
    LSTM(128, input_shape=(28, 28)),
    layers.Dense(len(np.unique(data['phone'])), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=64)

test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc}")