# Explore here


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import set_random_seed
from sklearn.metrics import accuracy_score


In [None]:
IMAGE_SIZE = 200
TRAIN_DIR = '/data/raw/train/'
TEST_DIR  = '/data/raw/test1/'

In [None]:
import os
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Load filenames and labels

filenames = os.listdir(TRAIN_DIR)
labels = [0 if fname.startswith('dog') else 1 for fname in filenames]

# Initialize arrays

images = []
for fname in tqdm(filenames, desc="Loading images"):
    path = os.path.join(TRAIN_DIR, fname)
    img = cv2.imread(path)
    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
    img = img / 255.0 # Normalize to [0, 1]
    images.append(img)

# Convert to NumPy arrays

X = np.array(images)
y = to_categorical(np.array(labels), num_classes=2)


In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(3, 3, figsize=(8, 8))
for i, ax in enumerate(axes.flat):
    ax.imshow(X_train[i])
    ax.set_title("Dog" if np.argmax(y_train[i]) == 0 else "Cat")
    ax.axis("off")
plt.tight_layout()
plt.show()


In [None]:
from tensorflow.keras.layers import Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

set_random_seed(42)

model = Sequential([
  # Layer that flattens the input image to a vector of 784 elements
  Flatten(input_shape = (28, 28)),
  # Dense hidden layer with 128 neurons and ReLU activation function
  Dense(128, activation = "relu"),
  # Output layer with 2 neurons (one for each category, 0 for dog, 1 for cat)
  Dense(2)
])

In [None]:

set_random_seed(42)

# Load the dataset
df = pd.read_csv('data/raw/dogs-vs-cats.csv')

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['image'], df['label'], test_size=0.2, random_state=42)

# Create the model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
