<a href="https://colab.research.google.com/github/Dhruvp187/CNN/blob/main/Untitled.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install patool

Collecting patool
  Downloading patool-2.0.0-py2.py3-none-any.whl (93 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.7/93.7 kB[0m [31m903.0 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: patool
Successfully installed patool-2.0.0


In [None]:
# # Install necessary libraries
# !pip install tensorflow
# !pip install patoolib tqdm

import os
import patoolib
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras import layers, models
from tqdm import tqdm
import subprocess

# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

# Extract RAR file with progress
dataset_path = "/content/gdrive/MyDrive/imdb_crop.rar"
extract_path = "/content/gdrive/MyDrive/extracted_dataset"


In [None]:
# Use subprocess to call patool command and capture the output
command = f"patool extract '{dataset_path}' -o '{extract_path}'"
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)

# Use tqdm to display the progress
with tqdm(total=100, unit='%', desc='Extracting', position=0, leave=True) as pbar:
    for line in process.stdout:
        pbar.update(1)

# Wait for the extraction process to finish
process.wait()



In [None]:
# Load and Preprocess Data with Batch Processing and Progress Bar
data = []
labels = []

batch_size = 1000  # Set your desired batch size

# Get the total number of files for progress bar
total_files = sum([len(files) for _, _, files in os.walk(extract_path)])

with tqdm(total=total_files, unit='files', desc='Loading and Preprocessing', position=0, leave=True) as pbar:
    for root, dirs, files in os.walk(extract_path):
        for file in files:
            if file.endswith(".jpg"):
                image_path = os.path.join(root, file)
                name, image_number, gender, age = file.split("_")
                age = int(age.split(".")[0])  # Extract age from filename
                image = cv2.imread(image_path)
                # Resize the image to your desired dimensions
                image = cv2.resize(image, (224, 224))
                data.append(image)
                labels.append(age)

                # Perform batch processing
                if len(data) % batch_size == 0:
                    data_batch = np.array(data) / 255.0
                    labels_batch = np.array(labels)

                    # Process the batch (you can replace this with your specific processing steps)
                    # For example, you might want to feed it to your model, update weights, etc.

                    # Clear the lists to avoid memory issues
                    data = []
                    labels = []

                    pbar.update(batch_size)  # Update the progress bar

# Process the last batch if it's not empty
if data:
    data_batch = np.array(data) / 255.0
    labels_batch = np.array(labels)

    # Process the last batch (you can replace this with your specific processing steps)
    # For example, you might want to feed it to your model, update weights, etc.

    pbar.update(len(data))

# Optionally, you can add code here to finalize any processing steps after all batches are processed
# For example, if you are training a model, you might want to perform final training steps here


In [None]:
# Custom tqdm callback
class tqdm_callback(tf.keras.callbacks.Callback):
    def __init__(self, pbar):
        super(tqdm_callback, self).__init__()
        self.pbar = pbar

    def on_epoch_end(self, epoch, logs=None):
        self.pbar.update(1)

    def on_batch_end(self, batch, logs=None):
        self.pbar.update(1)

# Split Data with Progress Bar
with tqdm(total=len(data), unit=' samples', desc='Splitting Data', position=0, leave=True) as pbar:
    train_data, test_data, train_labels, test_labels = train_test_split(
        data, labels, test_size=0.2, random_state=42
    )
    pbar.update(len(data))

# Build AgeNet Model
model = models.Sequential()

# Convolutional layers
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Flatten layer
model.add(layers.Flatten())

# Dense layers
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='linear'))

# Compile Model
model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['mae']
)

# Print model summary
model.summary()

# Train Model with Progress Bar
with tqdm(total=len(train_data), unit=' batches', desc='Training Model', position=0, leave=True) as pbar:
    model.fit(train_data, train_labels, epochs=10, batch_size=32, validation_split=0.2, callbacks=[tqdm_callback(pbar)])

# Evaluate Model with Progress Bar
with tqdm(total=len(test_data), unit=' batches', desc='Evaluating Model', position=0, leave=True) as pbar:
    test_loss, test_mae = model.evaluate(test_data, test_labels, callbacks=[tqdm_callback(pbar)])
    print(f"Mean Absolute Error on Test Data: {test_mae}")

# Save Model with Progress Bar
with tqdm(total=1, unit=' models', desc='Saving Model', position=0, leave=True) as pbar:
    model.save("/content/gdrive/MyDrive/age_prediction_model.h5")
    pbar.update(1)