In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2  # For image processing
import gc
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # For image augmentation
from tensorflow.keras.models import Sequential  # For building the neural network
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout  # For defining CNN layers
from tensorflow.keras.optimizers import Adam  # For the optimizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix  # For evaluation

In [None]:
image_path = '/kaggle/input/gender-dataset/Dataset/Train/Female/000001.jpg'

# Load the image using OpenCV
image = cv2.imread(image_path)

# Convert the image from BGR (OpenCV default) to RGB (Matplotlib default)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Plot the image using Matplotlib
plt.imshow(image_rgb)
plt.axis('off')  # Turn off axis labels
plt.show()

In [None]:
train_dir = '/kaggle/input/gender-dataset/Dataset/Train'
val_dir = '/kaggle/input/gender-dataset/Dataset/Validation'

def get_data(directory):
    data = []
    for gender in ['Male', 'Female']:
        gender_dir = os.path.join(directory, gender)
        for img_name in os.listdir(gender_dir):
            img_path = os.path.join(gender_dir, img_name)
            label = 0 if gender == 'Male' else 1
            data.append((img_path, label))
    return data

# Combine train and validation data
train_data = get_data(train_dir)
val_data = get_data(val_dir)
data = train_data + val_data

# Create DataFrame
df = pd.DataFrame(data, columns=['image_path', 'label'])

# Shuffle the DataFrame
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
import os
import pandas as pd

train_dir = '/kaggle/input/gender-dataset/Dataset/Train'
val_dir = '/kaggle/input/gender-dataset/Dataset/Validation'

def get_data(directory):
    data = []
    for gender in ['Male', 'Female']:
        gender_dir = os.path.join(directory, gender)
        for img_name in os.listdir(gender_dir):
            img_path = os.path.join(gender_dir, img_name)
            # Map gender to numerical label
            label = 0 if gender == 'Male' else 1
            data.append((img_path, label))
    return data

# Combine datasets
train_data = get_data(train_dir)
val_data = get_data(val_dir)
data = train_data + val_data

# Delete intermediate variables
del train_data, val_data
gc.collect()

# Create DataFrame
df = pd.DataFrame(data, columns=['image_path', 'label'])

# Delete the data list
del data
gc.collect()

# Separate male and female samples
male_samples = df[df['label'] == 0]
female_samples = df[df['label'] == 1]

# Determine the number of samples to keep (40% of the smaller class)
n_samples = int(min(len(male_samples), len(female_samples)) * 0.4)

# Randomly sample an equal number of male and female images
male_samples = male_samples.sample(n=n_samples, random_state=42)
female_samples = female_samples.sample(n=n_samples, random_state=42)

# Combine the sampled data
df_balanced = pd.concat([male_samples, female_samples])

# Delete intermediate DataFrames
del male_samples, female_samples
gc.collect()

# Shuffle the final dataset
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

# Delete the original DataFrame
del df
gc.collect()

# Display first few rows and basic info
print(df_balanced.head())
print("\nDataset shape:", df_balanced.shape)
print("\nClass distribution:")
print(df_balanced['label'].value_counts(normalize=True))

In [None]:
print(df_balanced[:30])

In [None]:
img_height, img_width = 64, 64  # Adjust as needed

# Function to load images and labels
def load_images_and_labels(df):
    X = []
    y = []
    total_images = len(df)
    
    for i, (_, row) in enumerate(df.iterrows()):
        img = load_img(row['image_path'], target_size=(img_height, img_width))
        img_array = img_to_array(img) / 255.0  # Normalize the image to [0, 1]
        X.append(img_array)
        y.append(row['label'])
        
        # Print progress every 1000 images
        if (i + 1) % 1000 == 0:
            print(f"Processed {i + 1}/{total_images} images")
    
    print(f"Finished processing all {total_images} images")
    return np.array(X), np.array(y)

In [None]:
train_df, test_df = train_test_split(df_balanced, test_size=0.2, random_state=42, stratify=df_balanced['label'])

# Delete the balanced DataFrame
del df_balanced
gc.collect()

In [None]:
X_train, y_train = load_images_and_labels(train_df)
X_test, y_test = load_images_and_labels(test_df)

# Garbage collect the split DataFrames
del train_df, test_df
gc.collect()

# Display shapes of the loaded data
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

In [None]:
print(X_train[0])

In [None]:
from tensorflow.keras import models, layers

input_shape = (64, 64, 3)  # Input size of 64x64
n_classes = 1

model = models.Sequential([
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(n_classes, activation='sigmoid'),
])

# Summary of the model
model.summary()


In [None]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
model.summary()

In [None]:
batch_size = 8
epochs = 20

history = model.fit(
    X_train, y_train,                # Training data and labels
    validation_data=(X_test, y_test), # Validation data and labels
    batch_size=batch_size,            # Batch size
    epochs=epochs,                    # Number of epochs
    verbose=1                         # Print progress during training
)

In [None]:
from tensorflow.keras.preprocessing import image

target_size = (64, 64)
path_testmodel = "/kaggle/input/gender-dataset/Dataset/Test/Female/160029.jpg"
imge = image.load_img(path_testmodel, target_size=target_size)
X = image.img_to_array(imge)
X = np.expand_dims(X, axis=0)

images = np.vstack([X])
classes = model.predict(images, batch_size=1)
print(classes[0])
if classes[0]<0.5:
    print("This is a male")
else:
    print( "This  is a female")
plt.imshow(imge)