In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np
import json
import cv2
import os
from sklearn.model_selection import train_test_split

# Data loading and preprocessing
def load_data(image_dir, labels_file):
    """
    Load images and their corresponding grasp points
    """
    # Load labels
    with open(labels_file, 'r') as f:
        labels_data = json.load(f)
    
    images = []
    grasp_points = []
    
    for item in labels_data:
        img_path = os.path.join(image_dir, item['image_id'])
        if os.path.exists(img_path):
            # Load and preprocess image
            img = cv2.imread(img_path)
            img = cv2.resize(img, (228, 228))  # Resize to required dimensions
            img = img / 255.0  # Normalize pixel values
            
            # Get grasp points
            gp = item['grasp_point']
            
            images.append(img)
            grasp_points.append(gp)
    
    return np.array(images), np.array(grasp_points)

# Create CNN model
def create_model():
    """
    Create CNN model for grasp point prediction
    """
    input_shape = (228, 228, 3)
    
    inputs = Input(shape=input_shape)
    
    # Convolutional layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    # Flatten and dense layers
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    
    # Output layer for (x,y) coordinates
    outputs = Dense(2, activation='sigmoid')(x)  # sigmoid to normalize outputs between 0 and 1
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Training function
def train_model(image_dir, labels_file, epochs=50, batch_size=32):
    """
    Train the CNN model
    """
    # Load and preprocess data
    X, y = load_data(image_dir, labels_file)
    
    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Create and compile model
    model = create_model()
    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='mse',
                 metrics=['mae'])
    
    # Add callbacks for better training
    callbacks = [
        tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
    ]
    
    # Train model
    history = model.fit(X_train, y_train,
                       epochs=epochs,
                       batch_size=batch_size,
                       validation_data=(X_val, y_val),
                       callbacks=callbacks)
    
    # Save model in .h5 format
    model.save('grasp_point_model.h5')
    
    return model, history

# Prediction function
def predict_grasp_point(model, image_path):
    """
    Predict grasp point for a single image
    """
    # Load and preprocess image
    img = cv2.imread(image_path)
    img = cv2.resize(img, (228, 228))
    img = img / 255.0
    
    # Make prediction
    prediction = model.predict(np.expand_dims(img, axis=0))[0]
    
    return prediction

# Usage example
if __name__ == "__main__":
    # Set your paths
    IMAGE_DIR = r"U:\training_data"
    LABELS_FILE = r"C:\Users\ADMIN\Desktop\final\formatted1_annotations.json"
    
    # Train model
    model, history = train_model(IMAGE_DIR, LABELS_FILE)
    
    # Example prediction
    test_image = r"U:\training_data\Z_23.png"
    grasp_point = predict_grasp_point(model, test_image)
    print(f"Predicted grasp point: {grasp_point}")

KeyboardInterrupt: 

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np
import json
import cv2
import os
from sklearn.model_selection import train_test_split

# Data loading and preprocessing
def load_data(image_dir, labels_file):
    """
    Load images and their corresponding grasp points
    """
    # Load labels
    with open(labels_file, 'r') as f:
        labels_data = json.load(f)
    
    images = []
    grasp_points = []
    
    for item in labels_data:
        img_path = os.path.join(image_dir, item['image_id'])
        if os.path.exists(img_path):
            # Load and preprocess image
            img = cv2.imread(img_path)
            img = cv2.resize(img, (128, 128))  # Resize to required dimensions
            img = img / 255.0  # Normalize pixel values
            
            # Get grasp points
            gp = item['grasp_point']
            
            images.append(img)
            grasp_points.append(gp)
    
    return np.array(images), np.array(grasp_points)

# Create CNN model
def create_model():
    """
    Create CNN model for grasp point prediction
    """
    input_shape = (128, 128, 3)
    
    inputs = Input(shape=input_shape)
    
    # Convolutional layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    # Flatten and dense layers
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    
    # Output layer for (x,y) coordinates
    outputs = Dense(2, activation='sigmoid')(x)  # sigmoid to normalize outputs between 0 and 1
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Training function
def train_model(image_dir, labels_file, epochs=50, batch_size=32):
    """
    Train the CNN model
    """
    # Load and preprocess data
    X, y = load_data(image_dir, labels_file)
    
    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Create and compile model
    model = create_model()
    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='mse',
                 metrics=['mae'])
    
    # Add callbacks for better training
    callbacks = [
        tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
    ]
    
    # Train model
    history = model.fit(X_train, y_train,
                       epochs=epochs,
                       batch_size=batch_size,
                       validation_data=(X_val, y_val),
                       callbacks=callbacks)
    
    # Save model in .h5 format
    model.save('grasp_point_model_128.h5')
    
    return model, history

# Prediction function
def predict_grasp_point(model, image_path):
    """
    Predict grasp point for a single image
    """
    # Load and preprocess image
    img = cv2.imread(image_path)
    img = cv2.resize(img, (128, 128))
    img = img / 255.0
    
    # Make prediction
    prediction = model.predict(np.expand_dims(img, axis=0))[0]
    
    return prediction

# Usage example
if __name__ == "__main__":
    # Set your paths
    IMAGE_DIR = r"U:\training_data"
    LABELS_FILE = r"C:\Users\ADMIN\Desktop\final_model\formatted1_annotations.json"
    
    # Train model
    model, history = train_model(IMAGE_DIR, LABELS_FILE)
    
    # Example prediction
    test_image = r"U:\training_data\Z_23.png"
    grasp_point = predict_grasp_point(model, test_image)
    print(f"Predicted grasp point: {grasp_point}")

Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 705ms/step - loss: 0.0150 - mae: 0.1009 - val_loss: 0.0044 - val_mae: 0.0531 - learning_rate: 0.0010
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 548ms/step - loss: 0.0056 - mae: 0.0615 - val_loss: 0.0044 - val_mae: 0.0535 - learning_rate: 0.0010
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 541ms/step - loss: 0.0046 - mae: 0.0556 - val_loss: 0.0043 - val_mae: 0.0536 - learning_rate: 0.0010
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 521ms/step - loss: 0.0049 - mae: 0.0583 - val_loss: 0.0042 - val_mae: 0.0525 - learning_rate: 0.0010
Epoch 5/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 514ms/step - loss: 0.0046 - mae: 0.0553 - val_loss: 0.0040 - val_mae: 0.0522 - learning_rate: 0.0010
Epoch 6/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 533ms/step - loss: 0.0047 - mae: 0.0577 - val_loss: 0.003



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
Predicted grasp point: [0.5534916  0.33896548]
