In [None]:
import cv2
import json
import numpy as np
import torch
import yaml
from collections import OrderedDict, defaultdict
from st_gcn.net.st_gcn import Model
import pandas as pd

def load_skeleton_data(json_path, start_frame, num_frames=120, num_person_in=1, num_person_out=1):
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    skeleton_data = np.zeros((3, num_frames, 18, num_person_out))  # C, T, V, M
    
    for frame in data['data'][start_frame:start_frame+num_frames]:
        frame_index = frame['frame_index'] - start_frame
        if frame_index >= num_frames:
            break
        
        for m, skeleton in enumerate(frame['skeleton'][:num_person_in]):
            if m >= num_person_out:
                break
            pose = np.array(skeleton['pose']).reshape(-1, 2)
            score = np.array(skeleton['score'])
            
            skeleton_data[0, frame_index, :, m] = pose[:, 0]
            skeleton_data[1, frame_index, :, m] = pose[:, 1]
            skeleton_data[2, frame_index, :, m] = score
    
    skeleton_data[0:2] = skeleton_data[0:2] - 0.5
    skeleton_data[0][skeleton_data[2] == 0] = 0
    skeleton_data[1][skeleton_data[2] == 0] = 0
    
    return skeleton_data

def predict_action(model, skeleton_data):
    data = torch.FloatTensor(skeleton_data).unsqueeze(0)
    data = data.to(next(model.parameters()).device)
    
    with torch.no_grad():
        output = model(data)
    
    probabilities = torch.nn.functional.softmax(output, dim=1)
    return probabilities.cpu().numpy()[0]

RIGHT_SHOULDER_INDEX = 2
LEFT_SHOULDER_INDEX = 5
RIGHT_WRIST_INDEX = 4
LEFT_WRIST_INDEX = 7

class ArmExerciseCounter:
    def __init__(self):
        self.is_arm_up = False
        self.count = 0
        self.threshold = 0.1
        self.min_frames_for_state = 6

    def count_exercises(self, skeleton_data):
        total_frames = skeleton_data.shape[1]
        up_frames = 0
        down_frames = 0
        current_state = 'down'
        count = 0

        for frame in range(total_frames):
            right_shoulder_y = skeleton_data[1, frame, RIGHT_SHOULDER_INDEX, 0]
            left_shoulder_y = skeleton_data[1, frame, LEFT_SHOULDER_INDEX, 0]
            right_wrist_y = skeleton_data[1, frame, RIGHT_WRIST_INDEX, 0]
            left_wrist_y = skeleton_data[1, frame, LEFT_WRIST_INDEX, 0]
            
            shoulder_y = (right_shoulder_y + left_shoulder_y) / 2
            wrist_y = max(right_wrist_y, left_wrist_y)

            if wrist_y < shoulder_y - self.threshold:
                up_frames += 1
                down_frames = 0
            elif wrist_y > shoulder_y + self.threshold:
                down_frames += 1
                up_frames = 0
            
            if current_state == 'down' and up_frames >= self.min_frames_for_state:
                current_state = 'up'
                up_frames = 0
            elif current_state == 'up' and down_frames >= self.min_frames_for_state:
                current_state = 'down'
                down_frames = 0
                count += 1

        self.count += count
        return self.count

def draw_skeleton(frame, keypoints):
    for i, point in enumerate(keypoints):
        x, y, prob = point
        if prob > 0.1:
            cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
    pairs = [(1, 2), (1, 5), (2, 3), (3, 4), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10),
             (1, 11), (11, 12), (12, 13), (1, 0), (0, 14), (14, 16), (0, 15), (15, 17)]
    for pair in pairs:
        partA, partB = pair
        if keypoints[partA][2] > 0.1 and keypoints[partB][2] > 0.1:
            cv2.line(frame, 
                     (int(keypoints[partA][0]), int(keypoints[partA][1])),
                     (int(keypoints[partB][0]), int(keypoints[partB][1])),
                     (0, 255, 255), 2)
    return frame

config_path = "../config/st_gcn/kinetics-skeleton-from-rawdata/test.yaml"

with open(config_path, 'r') as f:
    arg = yaml.safe_load(f)

model_args = arg['model_args']
model = Model(**model_args)

weights_path = "../model/half_80.pt"
weights = torch.load(weights_path)

new_weights = OrderedDict()
for k, v in weights.items():
    name = k.replace("module.", "") if "module." in k else k
    new_weights[name] = v

model.load_state_dict(new_weights, strict=False)
model.eval()

device = torch.device(f"cuda:{arg['device']}" if torch.cuda.is_available() else "cpu")
model = model.to(device)

class_labels = ['walk', 'stand', 'sit', 'armExe', 'lieDown', 'fall']

video_path = "../long_video/long (1).mp4"
json_path = "../data/Kinetics/kinetics-skeleton/long (1).json"
output_raw_path = "../data/Kinetics/kinetics-skeleton-raw/long (1)_raw.json"

results = []
arm_exercise_frames = []
arm_exercise_counter = ArmExerciseCounter()
total_arm_exercise_count = 0

with open(json_path, 'r') as f:
    data = json.load(f)

with open(output_raw_path, 'r') as f:
    raw_data = json.load(f)

total_frames = len(data['data'])
window_size = 105
step_size = 90

cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output_video.mp4', fourcc, fps, (frame_width, frame_height))

probability_threshold = 0.4

frame_count = 0
current_prediction = "None"
current_confidence = 0.0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_count < len(raw_data['data']):
        keypoints = raw_data['data'][frame_count]['skeleton'][0]['pose']
        keypoints = [keypoints[i:i+3] for i in range(0, len(keypoints), 3)]
        frame = draw_skeleton(frame, keypoints)

    if frame_count % step_size == 0:
        start_frame = frame_count
        if start_frame + window_size <= total_frames:
            skeleton_data = load_skeleton_data(json_path, start_frame, num_frames=window_size, 
                                               num_person_in=model_args['num_person'], 
                                               num_person_out=model_args['num_person'])
            prediction = predict_action(model, skeleton_data)
            predicted_class = np.argmax(prediction)
            confidence = prediction[predicted_class]

            if confidence > probability_threshold:
                current_prediction = class_labels[predicted_class]
                current_confidence = confidence
            else:
                current_prediction = "None"
                current_confidence = 0.0

            if current_prediction == 'armExe':
                arm_exercise_frames.append(skeleton_data)
            elif len(arm_exercise_frames) > 0:
                combined_skeleton_data = np.concatenate(arm_exercise_frames, axis=1)
                total_arm_exercise_count = arm_exercise_counter.count_exercises(combined_skeleton_data)
                arm_exercise_frames = []

            results.append({
                'start_time': start_frame / fps,
                'end_time': (start_frame + window_size) / fps,
                'predicted_action': current_prediction,
                'confidence': current_confidence,
                'total_arm_exercise_count': total_arm_exercise_count
            })

    text = f"{current_prediction} ({current_confidence:.2f}) - Total Count: {total_arm_exercise_count}"
    
    text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)[0]
    text_x = (frame_width - text_size[0]) // 2
    text_y = 50
    
    cv2.putText(frame, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    cv2.imshow('Video', frame)
    out.write(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame_count += 1

if len(arm_exercise_frames) > 0:
    combined_skeleton_data = np.concatenate(arm_exercise_frames, axis=1)
    total_arm_exercise_count = arm_exercise_counter.count_exercises(combined_skeleton_data)

print(f"Total Count: {total_arm_exercise_count}")
cap.release()
out.release()
cv2.destroyAllWindows()

df = pd.DataFrame(results)
df.to_csv('action_predictions.csv', index=False)

action_durations = defaultdict(float)
for result in results:
    action = result['predicted_action']
    duration = result['end_time'] - result['start_time']
    action_durations[action] += duration

def format_time(seconds):
    minutes, seconds = divmod(int(seconds), 60)
    return f"{minutes}m {seconds}s"

action_summary = [{"Action": action, "Total Duration": format_time(duration)} 
                  for action, duration in action_durations.items()]
pd.DataFrame(action_summary).to_csv('action_durations.csv', index=False)