# Includes

In [None]:
import cv2
import mediapipe as mp
import pandas as pd
import os
import numpy as np
import tensorflow as tf
from PIL import Image
from io import BytesIO
import requests
import time
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt


# Definition
res_x = 1280
res_y = 720
#labels = ["closedFist", "DaumenUP", "fingerCircle", "fingerSymbols", "none", "openPalm", "point"]
labels = ["paper", "rock", "scissors"]
training_folder = 'training_images_rps'

url_cam = "http://admin:12345@10.100.91.200/image/jpeg.cgi"
url_shelly = "http://10.100.91.43:8080/rest/items/ShellyLight_Betrieb"

In [None]:
import sys
print(f"Python version: {sys.version}")
print(f"Python executable: {sys.executable}")


!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install mediapipe pandas tensorflow scikit-learn matplotlib opencv-python pillow requests

# Building Training Set

In [None]:
image_set = []
image_set_labels = []

mp_hands = mp.solutions.hands

# Landmark normalization and augmentation functions
def normalize_landmarks(landmarks_array):
    """Normalize landmarks to be scale and translation invariant"""
    landmarks = landmarks_array.reshape(-1, 3)

    wrist = landmarks[0].copy()

    landmarks = landmarks - wrist

    distances = np.linalg.norm(landmarks, axis=1)
    max_distance = np.max(distances)
    
    if max_distance > 0:
        landmarks = landmarks / max_distance
    return landmarks.flatten()


def rotate_landmarks_3d(landmarks_array, angle_x=0, angle_y=0, angle_z=0):
    """Rotate 3D landmarks around x, y, z axes"""
    landmarks = landmarks_array.reshape(-1, 3)
    cos_x, sin_x = np.cos(angle_x), np.sin(angle_x)
    Rx = np.array([[1, 0, 0],
                   [0, cos_x, -sin_x],
                   [0, sin_x, cos_x]])
    cos_y, sin_y = np.cos(angle_y), np.sin(angle_y)
    Ry = np.array([[cos_y, 0, sin_y],
                   [0, 1, 0],
                   [-sin_y, 0, cos_y]])
    cos_z, sin_z = np.cos(angle_z), np.sin(angle_z)
    Rz = np.array([[cos_z, -sin_z, 0],
                   [sin_z, cos_z, 0],
                   [0, 0, 1]])
    landmarks = landmarks @ Rx.T @ Ry.T @ Rz.T
    return landmarks.flatten()


def augment_landmarks(landmarks_array, num_augmentations=10):
    # Default rotation ranges (radians): ~20° X/Y, ~28° Z
    rotation_defaults = {'x': 0.35, 'y': 0.35, 'z': 0.5}

    augmented = [landmarks_array]

    for _ in range(num_augmentations):
        angle_x = np.random.uniform(-rotation_defaults['x'], rotation_defaults['x'])
        angle_y = np.random.uniform(-rotation_defaults['y'], rotation_defaults['y'])
        angle_z = np.random.uniform(-rotation_defaults['z'], rotation_defaults['z'])

        rotated = rotate_landmarks_3d(landmarks_array.copy(), angle_x, angle_y, angle_z)
        noise = np.random.normal(0, 0.02, rotated.shape)
        augmented_sample = rotated + noise

        # Append rotated sample
        augmented.append(augmented_sample)

        # Append mirrored version (flip X)
        mirrored = augmented_sample.copy().reshape(-1, 3)
        mirrored[:, 0] *= -1
        augmented.append(mirrored.flatten())

    # also add mirrored original
    mirrored_orig = landmarks_array.copy().reshape(-1, 3)
    mirrored_orig[:, 0] *= -1
    augmented.append(mirrored_orig.flatten())

    return augmented


def process_image_by_mediapipe(image, hands):
    """Process image with an existing hands instance"""
    hand_results = hands.process(image)
    
    if hand_results.multi_hand_landmarks:
        hand_landmarks = hand_results.multi_hand_landmarks[0]
        
        landmarks_array = []
        for landmark in hand_landmarks.landmark:
            landmarks_array.extend([landmark.x, landmark.y, landmark.z])
        
        landmarks_array = np.array(landmarks_array)
        
        # Normalize the landmarks
        landmarks_array = normalize_landmarks(landmarks_array)
        
        return landmarks_array
    else:
        return None


# Create hands instance once outside the loop
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5,  # Lower threshold to catch more distant hands
    min_tracking_confidence=0.5) as hands:
    
    for image_type in os.listdir(training_folder):
        if image_type in labels:
            label_idx = labels.index(image_type)
            for img_file in os.listdir(f'{training_folder}/{image_type}'):
                image_path = os.path.join(training_folder, image_type, img_file)
                image = cv2.imread(image_path)
                if image is not None:
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    try:
                        hand_landmarks = process_image_by_mediapipe(image, hands)
                        if hand_landmarks is not None:
                            # Add original sample
                            image_set.append(hand_landmarks)
                            image_set_labels.append(label_idx)
                            
                            # Add augmented samples (more rotations + mirrored versions)
                            augmented_samples = augment_landmarks(hand_landmarks, num_augmentations=10)
                            for aug_sample in augmented_samples[1:]:  # Skip first (original)
                                image_set.append(aug_sample)
                                image_set_labels.append(label_idx)
                            
                            print(f"Processed: {image_path} - Generated {len(augmented_samples)} samples")
                        else:
                            print(f"No hand detected in: {image_path}")
                    except Exception as e:
                        print(f"Error processing {image_path}: {e}")


# Shuffle dataset to avoid chronological order
if len(image_set) > 1:
    perm = np.random.permutation(len(image_set))
    image_set = [image_set[i] for i in perm]
    image_set_labels = [image_set_labels[i] for i in perm]
    print("Shuffled dataset to remove chronological ordering.")

# Convert to numpy arrays
image_set = np.array(image_set, dtype=np.float32)
image_set_labels = np.array(image_set_labels, dtype=np.int32)

print(f"\nFinal dataset shape: {image_set.shape}")
print(f"Labels shape: {image_set_labels.shape}")
print(f"Each sample has {image_set.shape[1]} features (21 landmarks × 3 coordinates)")
print(f"Total samples including augmentation: {len(image_set)}")

# Quick preview of label distribution
unique, counts = np.unique(image_set_labels, return_counts=True)
print(dict(zip([labels[u] for u in unique], counts)))



In [None]:
import numpy as np
import cv2
from PIL import Image
from IPython.display import display
import mediapipe as mp

HAND_CONNECTIONS = mp.solutions.hands.HAND_CONNECTIONS  # list of (a,b) pairs

def visualize_landmarks(landmarks_flat, canvas_size=(400,400), circle_r=4, line_w=2):
    lm = np.array(landmarks_flat).reshape(-1, 3)  # (21,3)
    h, w = canvas_size[1], canvas_size[0]

    # landmarks in your notebook are wrist-centered and scaled by max_distance (range roughly -1..1).
    # Map x,y from approx [-1,1] -> [0..w-1]/[0..h-1]
    coords = lm[:, :2].copy()
    coords = (coords + 1.0) * 0.5  # now ~[0..1]
    coords[:, 0] *= w
    coords[:, 1] *= h
    coords = coords.astype(int)

    canvas = np.ones((h, w, 3), dtype=np.uint8) * 255  # white background
    # draw lines
    for a, b in HAND_CONNECTIONS:
        x1, y1 = tuple(coords[a])
        x2, y2 = tuple(coords[b])
        cv2.line(canvas, (x1, y1), (x2, y2), (0, 200, 0), line_w)
    # draw points
    for (x, y) in coords:
        cv2.circle(canvas, (x, y), circle_r, (0, 0, 255), -1)

    # Display
    display(Image.fromarray(cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)))

# Example: show a random sample (if image_set contains landmarks)

rand = np.random.randint(0, len(image_set))

visualize_landmarks(image_set[rand])
print(f"Label: {labels[image_set_labels[rand]]}")

# Training and Vis

In [None]:
print("\n=== Class Distribution ===")
for i, label in enumerate(labels):
    count = np.sum(image_set_labels == i)
    print(f"{label}: {count} samples ({count/len(image_set_labels)*100:.1f}%)")

def print_training_results(history, model):
    predictions = model.predict(image_set)
    predicted_labels = np.argmax(predictions, axis=1)
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    print("\n=== Classification Report ===")
    print(classification_report(image_set_labels, predicted_labels, target_names=labels))
    print("\n=== Confusion Matrix ===")
    cm = confusion_matrix(image_set_labels, predicted_labels)
    print(cm)


class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(image_set_labels),
    y=image_set_labels
)
class_weight_dict = dict(enumerate(class_weights))
print(f"\nClass weights: {class_weight_dict}")

def build_NN_model():
    model = tf.keras.models.Sequential([
        tf.keras.Input(shape=(63,)),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(len(labels), activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=['accuracy']
    )
    return model

def build_LSTM_model():
    model = tf.keras.models.Sequential([
        tf.keras.Input(shape=(63,)),
        tf.keras.layers.Reshape((21, 3)),
        tf.keras.layers.LSTM(128, return_sequences=True),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(len(labels), activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=['accuracy']
    )
    return model


model = build_LSTM_model()
#model.summary()

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=30,
    restore_best_weights=True,
)

history = model.fit(
    image_set, 
    image_set_labels, 
    epochs=100, 
    batch_size=8,
    validation_split=0.2,
    class_weight=class_weight_dict,
    callbacks=[early_stopping],
    verbose=1,
)
model.save('model.keras')

print_training_results(history, model)


# Prediction

In [4]:
def get_picture_from_url(url = url_cam):
    response = requests.get(url)
    response.raise_for_status()
    image = Image.open(BytesIO(response.content))
    # PIL returns RGB, keep it as RGB for now
    return np.array(image)

def normalize_landmarks(landmarks_array):
    """Normalize landmarks to be scale and translation invariant"""
    # Reshape to get x, y, z separately
    landmarks = landmarks_array.reshape(-1, 3)
    
    # Get wrist (landmark 0) as reference point
    wrist = landmarks[0].copy()
    
    # Translate so wrist is at origin
    landmarks = landmarks - wrist
    
    # Calculate the scale (max distance from wrist)
    distances = np.linalg.norm(landmarks, axis=1)
    max_distance = np.max(distances)
    
    # Normalize by max distance to make scale-invariant
    if max_distance > 0:
        landmarks = landmarks / max_distance
    
    # Flatten back
    return landmarks.flatten()

def switch_light(predicted_label):
    if predicted_label == "rock":
        control_shelly_light(state="ON")
    elif predicted_label == "paper":
        control_shelly_light(state="OFF")
    elif predicted_label == "scissors":
        control_shelly_light(state="OFF")

def control_shelly_light(state="ON", auth_token="oh.ToggleLight.i994VVCmkKJgzwUanlkAFP1Pi86QpajtliS9OYdETG1vBh1c58DQTnZa0mjXp95MS8KBpYn7Fu5GtYRgbiaQ", url = url_shelly):
    
    headers = {
        "Content-Type": "text/plain"
    }
    
    # Add auth token if provided
    if auth_token:
        headers["Authorization"] = f"Bearer {auth_token}"
    
    try:
        # Send POST request with the state as data
        response = requests.post(url, data=state, headers=headers)
        
        if response.status_code == 200:
            print(f"✓ Light successfully set to {state}")
            print(f"Status Code: {response.status_code}")
            print(f"Response: {response.text}")
        else:
            print(f"✗ Failed to control light")
            print(f"Status Code: {response.status_code}")
            print(f"Response: {response.text}")
            
        return response
        
    except requests.exceptions.RequestException as e:
        print(f"✗ Error occurred: {e}")
        return None

def process_image_by_mediapipe(image, hands):
    hand_results = hands.process(image)
    
    if hand_results.multi_hand_landmarks:
        hand_landmarks = hand_results.multi_hand_landmarks[0]
        landmarks_array = []
        for landmark in hand_landmarks.landmark:
            landmarks_array.extend([landmark.x, landmark.y, landmark.z])
        
        landmarks_array = np.array(landmarks_array)
        
        # Normalize the landmarks
        landmarks_array = normalize_landmarks(landmarks_array)
        
        return landmarks_array
    else: 
        return None

list_hand_landmarks = []
mp_drawing_styles = mp.solutions.drawing_styles
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

hands = mp_hands.Hands(
        min_detection_confidence=0.8,
        min_tracking_confidence=0.5)

model = tf.keras.models.load_model('model.keras')

display_image = False 

while True:
    frame = get_picture_from_url(url_cam)  # Returns RGB image
    image_rgb = cv2.flip(frame, 1)  # Flip horizontally, keep RGB
    
    # Make a copy for MediaPipe processing
    image_rgb_processing = image_rgb.copy()
    image_rgb_processing.flags.writeable = False
    hand_results = hands.process(image_rgb_processing)

    landmarks = process_image_by_mediapipe(image_rgb_processing, hands)
    
    if landmarks is None:
        if display_image:
            image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
            cv2.putText(image_bgr, "No hand detected", (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
            cv2.imshow('MediaPipe Hands', image_bgr)
    else:
        prediction = model.predict(np.array([landmarks], dtype=np.float32), verbose=0)
        predicted_label = labels[np.argmax(prediction)]
        confidence = np.max(prediction)
        
        print(f"Prediction: {predicted_label} with confidence {confidence:.2f}")
        switch_light(predicted_label)
        if display_image:
            image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
            if hand_results.multi_hand_landmarks:
                for hand_landmarks in hand_results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image_bgr,
                        hand_landmarks,
                        mp_hands.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                        mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2)
                    )
            text = f"{predicted_label}: {confidence:.2%}"
            cv2.putText(image_bgr, text, (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            cv2.imshow('MediaPipe Hands', image_bgr)
    
    if cv2.waitKey(10) & 0xFF == ord('q') & display_image:
        cv2.destroyAllWindows()
        break


Prediction: paper with confidence 1.00
✓ Light successfully set to OFF
Status Code: 200
Response: 
Prediction: rock with confidence 1.00
✓ Light successfully set to ON
Status Code: 200
Response: 
Prediction: scissors with confidence 1.00
✓ Light successfully set to OFF
Status Code: 200
Response: 
Prediction: scissors with confidence 0.71
✓ Light successfully set to OFF
Status Code: 200
Response: 
Prediction: rock with confidence 1.00
✓ Light successfully set to ON
Status Code: 200
Response: 
Prediction: rock with confidence 0.99
✓ Light successfully set to ON
Status Code: 200
Response: 
Prediction: rock with confidence 0.99
✓ Light successfully set to ON
Status Code: 200
Response: 
Prediction: paper with confidence 1.00
✓ Light successfully set to OFF
Status Code: 200
Response: 
Prediction: paper with confidence 1.00
✓ Light successfully set to OFF
Status Code: 200
Response: 
Prediction: rock with confidence 1.00
✓ Light successfully set to ON
Status Code: 200
Response: 
Prediction: ro

KeyboardInterrupt: 

# Just MediaPipe

In [5]:
import cv2
import mediapipe as mp
import pandas as pd


def process_landmarks_to_dataframe(all_frames_landmarks, landmark_type='hand'):
    """
    Processes a list of landmark data from all frames and converts it into a Pandas DataFrame.
    """
    processed_data = []
    for frame_idx, frame_landmarks in enumerate(all_frames_landmarks):
        if frame_landmarks:
            for object_idx, specific_landmarks in enumerate(frame_landmarks):
                for landmark_idx, landmark in enumerate(specific_landmarks.landmark):
                    processed_data.append({
                        'frame': frame_idx,
                        f'{landmark_type}_id': object_idx,
                        'landmark': landmark_idx,
                        'x': landmark.x,
                        'y': landmark.y,
                        'z': landmark.z
                    })
    df = pd.DataFrame(processed_data)
    return df

def get_picture_from_url(url):
    response = requests.get(url)
    response.raise_for_status()
    image = Image.open(BytesIO(response.content))
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)


def process_image_by_mediapipe(image, hands):
    hand_results = hands.process(image)
    
    if hand_results.multi_hand_landmarks:
        hand_landmarks = hand_results.multi_hand_landmarks[0]
        landmarks_array = []
        for landmark in hand_landmarks.landmark:
            landmarks_array.extend([landmark.x, landmark.y, landmark.z])
        
        landmarks_array = np.array(landmarks_array)
        
        # Normalize the landmarks
        landmarks_array = normalize_landmarks(landmarks_array)
        
        return landmarks_array
    else: 
        return None
    
def normalize_landmarks(landmarks_array):
    """Normalize landmarks to be scale and translation invariant"""
    # Reshape to get x, y, z separately
    landmarks = landmarks_array.reshape(-1, 3)
    
    # Get wrist (landmark 0) as reference point
    wrist = landmarks[0].copy()
    
    # Translate so wrist is at origin
    landmarks = landmarks - wrist
    
    # Calculate the scale (max distance from wrist)
    distances = np.linalg.norm(landmarks, axis=1)
    max_distance = np.max(distances)
    
    # Normalize by max distance to make scale-invariant
    if max_distance > 0:
        landmarks = landmarks / max_distance
    
    # Flatten back
    return landmarks.flatten()
    


###------------------------------------------------------------------------------------------



list_hand_landmarks = []
list_face_landmarks = []
df_hand_landmarks = []


# Initialize MediaPipe drawing, hands, and face_mesh utilities
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_holistic = mp.solutions.holistic
mp_drawing_styles = mp.solutions.drawing_styles

model = tf.keras.models.load_model('model.keras')


cap = cv2.VideoCapture(0)

# Define drawing styles for face mesh
tesselation_drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1, color=(100,150,100))
contours_drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1, color=(150,200,250))

REFINED_LANDMARKS = False


# Initialize Hands, Face Mesh, and Pose models
hands = mp_hands.Hands(
        min_detection_confidence=0.8,
        min_tracking_confidence=0.5)
pose = mp_pose.Pose(
            model_complexity=1,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5)


while True:

    #ret, frame = cap.read()
    #if not ret:
    #    print("Ignoring empty camera frame.")
    #    continue

    frame = cap.read()[1]

    # Flip the image horizontally for a selfie-view display
    # Convert the BGR image to RGB.
    image_rgb = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
    image_rgb.flags.writeable = False
    
    # Process with all models
    hand_results = hands.process(image_rgb)
    pose_results = pose.process(image_rgb)
    
    image_rgb.flags.writeable = True
    image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR) # Convert back to BGR for OpenCV


    # ------------- Data Capture -------------------
    if hand_results.multi_hand_landmarks:
        list_hand_landmarks.append(hand_results.multi_hand_landmarks)

    landmarks = process_image_by_mediapipe(image_rgb, hands)


    # ------------- Drawing -------------------
    # Draw hand landmarks
    if hand_results.multi_hand_landmarks:
        for hand_landmarks in hand_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image_bgr,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2)
            )

    # Draw pose landmarks
    if pose_results.pose_landmarks:
        mp_drawing.draw_landmarks(
            image_bgr,
            pose_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )


    if landmarks is None:
        # Display "No hand detected" on the image
        cv2.putText(image_bgr, "No hand detected", (10, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
    else:
        prediction = model.predict(np.array([landmarks], dtype=np.float32), verbose=0)
        predicted_label = labels[np.argmax(prediction)]
        confidence = np.max(prediction)
        
        # Display prediction on the image
        text = f"{predicted_label}: {confidence:.2%}"
        cv2.putText(image_bgr, text, (10, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        print(f"Prediction: {predicted_label} with confidence {confidence:.2f}") 

    # ---------------- End ----------------------
    cv2.imshow('Body Tracking', image_bgr)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

    delay = 0.1  # seconds
    time.sleep(delay)

cap.release()
cv2.destroyAllWindows()

# --- Convert the captured landmark data to DataFrames ---
hand_df = process_landmarks_to_dataframe(list_hand_landmarks, 'hand')


# --- Display the DataFrames ---
print("--- Hand Landmarks DataFrame ---")
# Using display() is great for notebooks like Jupyter/Dataspell.
# If running a standard .py script, you might prefer print(hand_df)
display(hand_df)



Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: rock with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.00
Prediction: paper with confidence 1.

Unnamed: 0,frame,hand_id,landmark,x,y,z
0,0,0,0,0.178537,0.669285,-5.491133e-07
1,0,0,1,0.192061,0.610426,1.678064e-02
2,0,0,2,0.227453,0.574999,2.045002e-02
3,0,0,3,0.255540,0.569960,2.023895e-02
4,0,0,4,0.267841,0.578728,1.864945e-02
...,...,...,...,...,...,...
3775,178,0,16,0.722239,0.470668,-6.580956e-02
3776,178,0,17,0.729762,0.533465,-4.327568e-02
3777,178,0,18,0.770903,0.478535,-6.234528e-02
3778,178,0,19,0.765163,0.482829,-6.125655e-02


In [None]:
from PIL import Image
from io import BytesIO
import requests
import time
import numpy as np
import cv2

url = "http://admin:12345@10.100.91.200/image/jpeg.cgi"

print("Press 'q' to quit...")
while True:
    try:
        response = requests.get(url, stream=True, timeout=5)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        image = Image.open(BytesIO(response.content))
        image_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        cv2.imshow("Camera Feed", image_bgr)
    except requests.exceptions.RequestException as e:
        print(f"Error fetching image: {e}")
    except Exception as e:
        print(f"Error processing image: {e}")
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("Quitting...")
        break
    
    time.sleep(0.01)  # Small delay to prevent busy waiting

cv2.destroyAllWindows()
