In [1]:
sc

In [2]:
import cv2
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np

In [3]:
images_df= sc.binaryFiles("hdfs://172.17.41.8:9000/ADACA1/futbol1,hdfs://172.17.41.8:9000/ADACA1/Rugby")

In [4]:
images_df

org.apache.spark.api.java.JavaPairRDD@ef709b3

In [5]:
from PIL import Image
import io

image_data = images_df.collect()
image_pixels = []
image_labels = []

for image_path, image_byte in image_data:
    image = Image.open(io.BytesIO(image_byte))
    pixels = np.array(image)
    
    image_name = image_path.split('/')[-1].split('.')[0]
    
    image_pixels.append(pixels)
    image_labels.append(image_name)  

df = pd.DataFrame({'image': image_pixels, 'label': image_labels})

[Stage 0:>                                                          (0 + 1) / 1]                                                                                

In [6]:
df.head()

Unnamed: 0,image,label
0,"[[[193, 185, 148], [193, 185, 148], [194, 186,...",futbol_001
1,"[[[25, 33, 20], [22, 30, 17], [22, 30, 19], [2...",futbol_002
2,"[[[58, 64, 54], [67, 73, 63], [52, 55, 46], [5...",futbol_003
3,"[[[208, 196, 172], [208, 196, 172], [208, 196,...",futbol_005
4,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",futbol_006


In [7]:
df.shape

(59, 2)

In [8]:
type(df)

pandas.core.frame.DataFrame

In [9]:
def resize_image(img):
    return cv2.resize(img, (150, 150))

df['image'] = df['image'].apply(resize_image)

In [10]:
df.head()

Unnamed: 0,image,label
0,"[[[193, 185, 148], [194, 186, 149], [195, 187,...",futbol_001
1,"[[[25, 33, 20], [22, 30, 18], [27, 35, 24], [2...",futbol_002
2,"[[[60, 66, 56], [54, 58, 48], [56, 59, 50], [5...",futbol_003
3,"[[[208, 196, 172], [208, 196, 172], [208, 196,...",futbol_005
4,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",futbol_006


In [11]:
from sklearn.model_selection import train_test_split

X = df['image']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)



Training set shape: (47,) (47,)
Testing set shape: (12,) (12,)


In [12]:
X_train=np.stack(X_train)
X_test=np.stack(X_test)

In [13]:
X_train.shape

(47, 150, 150, 3)

In [14]:
X_train1 = X_train.reshape(X_train.shape[0], 150, 150, 3).astype(np.float32)
X_test1 = X_test.reshape(X_test.shape[0], 150, 150, 3).astype(np.float32)
print("Training set shape for CNN:", X_train1.shape)
print("Testing set shape for CNN:", X_test1.shape)

Training set shape for CNN: (47, 150, 150, 3)
Testing set shape for CNN: (12, 150, 150, 3)


In [15]:
#normalize data
X_train1/=255
X_test1/=255

In [16]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

y_train_category = [label.split('_')[0] for label in y_train]
y_test_category = [label.split('_')[0] for label in y_test]

label_encoder = LabelEncoder()
all_categories = y_train_category + y_test_category
label_encoder.fit(all_categories)

y_train_encoded = label_encoder.transform(y_train_category)
y_test_encoded = label_encoder.transform(y_test_category)

num_classes = len(label_encoder.classes_)
y_train_encoded = to_categorical(y_train_encoded, num_classes=num_classes)
y_test_encoded = to_categorical(y_test_encoded, num_classes=num_classes)

print("Shape of y_train_encoded:", y_train_encoded.shape)
print("Shape of y_test_encoded:", y_test_encoded.shape)


2024-03-22 02:42:15.822582: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-22 02:42:15.854558: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Shape of y_train_encoded: (47, 3)
Shape of y_test_encoded: (12, 3)


In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Dropout(0.3),  # Adjust dropout rate
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.4),  # Adjust dropout rate
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.5),  # Adjust dropout rate
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),  # Adjust dropout rate
    Dense(num_classes, activation='softmax')  
])

  super().__init__(
2024-03-22 02:42:16.920925: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-22 02:42:16.921214: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [18]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [19]:
model.fit(X_train, y_train_encoded, epochs=30, batch_size=32, validation_data=(X_test, y_test_encoded))


Epoch 1/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 229ms/step - accuracy: 0.3803 - loss: 125.0425 - val_accuracy: 0.5000 - val_loss: 20.2378
Epoch 2/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.4087 - loss: 181.4095 - val_accuracy: 0.5000 - val_loss: 9.5211
Epoch 3/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.4863 - loss: 57.0816 - val_accuracy: 0.5000 - val_loss: 1.4048
Epoch 4/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.4362 - loss: 14.2132 - val_accuracy: 0.5000 - val_loss: 0.7922
Epoch 5/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.5071 - loss: 4.9124 - val_accuracy: 0.5000 - val_loss: 0.5510
Epoch 6/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.4929 - loss: 1.6358 - val_accuracy: 0.4167 - val_loss: 0.6119
Epoch 7/30
[1m2/2[0m [32m━━━━━

<keras.src.callbacks.history.History at 0x7f8eed5b4df0>

In [20]:
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
print("Test accuracy:", test_accuracy)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.6667 - loss: 0.5430
Test accuracy: 0.6666666865348816
