In [6]:
!pip install ultralytics opencv-python scikit-image
!pip install -U ipywidgets



In [9]:
!cp -r /kaggle/input/trainable-yolo-cls/* /kaggle/working/

In [10]:
# !rm -r /kaggle/working/processed_data/test

In [None]:
data_yaml_content = """
train: /kaggle/working/processed_data/train
val: /kaggle/working/processed_data/val

nc: 10  # Number of classes
names: ['sweep', 'square_cut', 'hook', 'lofted', 'cover', 'late_cut', 'pull', 'defense', 'flick', 'straight']
"""


# Path where you want to save the data.yaml file
yaml_file_path = '/kaggle/working/data.yaml'

# Save the YAML content to the file
with open(yaml_file_path, 'w') as file:
    file.write(data_yaml_content)

# Confirm the file has been saved
print(f"data.yaml saved at: {yaml_file_path}")

In [None]:
# Read the YAML file content (optional)
with open(yaml_file_path, 'r') as file:
    content = file.read()
    print(content)

In [None]:
# Initialize Weights & Biases (W&B) in disabled mode.

import wandb
wandb.init(mode="disabled")

In [11]:
import pandas as pd
import os

# Base directory for the test videos
test_base_dir = '/kaggle/working/cricshot-split/test'  # Update this path to where your test videos are stored

# Prepare the ground truth DataFrame by scanning the test directory
ground_truth_data = []
for class_name in os.listdir(test_base_dir):
    class_dir = os.path.join(test_base_dir, class_name)
    if os.path.isdir(class_dir):
        for video_name in os.listdir(class_dir):
            if video_name.endswith('.mp4'):  # Assuming the videos are in .mp4 format
                ground_truth_data.append({
                    'video_name': video_name,
                    'true_class': class_name
                })

# Convert to a DataFrame
ground_truth_df = pd.DataFrame(ground_truth_data)
print(ground_truth_df)

           video_name true_class
0      cover_0112.mp4      cover
1      cover_0043.mp4      cover
2      cover_0131.mp4      cover
3      cover_0061.mp4      cover
4      cover_0160.mp4      cover
..                ...        ...
190  defense_0118.mp4    defense
191  defense_0004.mp4    defense
192  defense_0141.mp4    defense
193  defense_0164.mp4    defense
194  defense_0011.mp4    defense

[195 rows x 2 columns]


In [12]:
import cv2
import os
import pandas as pd
from ultralytics import YOLO
from collections import Counter, defaultdict

saved_model_path = '/kaggle/input/yolov8-l-cls-23-eps-v2/pytorch/default/1/best(1).pt'
# saved_model_path = '/kaggle/working/runs/classify/train/weights/best.pt'

# Load the trained YOLOv8 model
model = YOLO(saved_model_path)  # Update to your trained model path

# Initialize a list to store predictions
predictions = []

# Function to extract exactly 32 frames from a video
def extract_frames(video_path, num_frames=32, img_size=(640, 640)):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if frame_count < num_frames:
        raise ValueError(f"Video {video_path} has fewer frames ({frame_count}) than requested ({num_frames}).")
    
    interval = max(1, frame_count // num_frames)
    frames = []
    for i in range(num_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * interval)
        ret, frame = cap.read()
        if not ret:
            break
        resized_frame = cv2.resize(frame, img_size)
        frames.append(resized_frame)
    cap.release()
    return frames

# Loop through the test directories and videos
for class_name in os.listdir(test_base_dir):
    class_dir = os.path.join(test_base_dir, class_name)
    if os.path.isdir(class_dir):
        for video_name in os.listdir(class_dir):
            if video_name.endswith('.mp4'):
                video_path = os.path.join(class_dir, video_name)
                
                try:
                    # Extract 32 frames from the video
                    frames = extract_frames(video_path, num_frames=32)
                    
                    # Predict on video frames
                    results = model.predict(frames)
#                     print(results[0].probs.top1)
                    # Initialize a dictionary to accumulate class probabilities
                    class_probabilities = defaultdict(list)

                    # Collect frame-level probabilities
                    for result in results:
                        top_class = result.names[result.probs.top1]
                        top_prob = result.probs.top1conf.item()
                        class_probabilities[top_class].append(top_prob)
                    
                    # Calculate average probability for each class
                    avg_class_probabilities = {cls: sum(probs) / len(results) for cls, probs in class_probabilities.items()}
                    
                    print(avg_class_probabilities)
                    
                    # Determine the class with the highest average probability
                    most_probable_class = max(avg_class_probabilities, key=avg_class_probabilities.get)
                    
                    print(f"Video: {video_name}, Most Probable Class: {most_probable_class}, Avg Probability: {avg_class_probabilities[most_probable_class]:.4f}")
                    
                    # Store the prediction
                    predictions.append({
                        'video_name': video_name,
                        'true_class': class_name,
                        'predicted_class': most_probable_class,
                        'avg_probability': avg_class_probabilities[most_probable_class]
                    })
                except ValueError as e:
                    print(f"Skipping {video_path}: {e}")
                    
                    
# Convert predictions to a DataFrame
predictions_df = pd.DataFrame(predictions)
print(predictions_df)

# Example: Print the predicted class for the first video
if not predictions_df.empty:
    first_prediction = predictions_df.iloc[0]
    print(f"Video: {first_prediction['video_name']}, Predicted Class: {first_prediction['predicted_class']}")


0: 640x640 pull 0.58, square_cut 0.14, hook 0.12, late_cut 0.06, flick 0.03, 10.8ms
1: 640x640 pull 0.58, hook 0.18, square_cut 0.12, flick 0.04, late_cut 0.04, 10.8ms
2: 640x640 square_cut 0.32, hook 0.26, pull 0.25, flick 0.07, cover 0.05, 10.8ms
3: 640x640 square_cut 0.56, pull 0.31, hook 0.07, cover 0.01, sweep 0.01, 10.8ms
4: 640x640 pull 0.51, square_cut 0.27, hook 0.10, late_cut 0.05, cover 0.02, 10.8ms
5: 640x640 pull 0.53, square_cut 0.14, hook 0.14, late_cut 0.10, lofted 0.05, 10.8ms
6: 640x640 pull 0.57, hook 0.17, late_cut 0.09, lofted 0.05, square_cut 0.04, 10.8ms
7: 640x640 pull 0.52, hook 0.18, square_cut 0.10, late_cut 0.08, flick 0.05, 10.8ms
8: 640x640 pull 0.64, hook 0.24, square_cut 0.07, lofted 0.02, flick 0.01, 10.8ms
9: 640x640 pull 0.59, hook 0.18, square_cut 0.12, flick 0.05, cover 0.03, 10.8ms
10: 640x640 square_cut 0.44, pull 0.38, flick 0.11, hook 0.05, lofted 0.01, 10.8ms
11: 640x640 pull 0.69, square_cut 0.09, hook 0.08, flick 0.06, lofted 0.04, 10.8ms
12

KeyboardInterrupt: 

In [None]:
# Merge predictions with ground truth for comparison
comparison_df = predictions_df

# comparison_df.to_csv("comperision_df.csv",index=False)
# Display the comparison DataFrame
comparison_df

In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# Assume comparison_df is already created

# Calculate accuracy
accuracy = accuracy_score(comparison_df['true_class'], comparison_df['predicted_class'])
print(f"Accuracy: {accuracy:.2f}")

# Generate classification report
class_report = classification_report(comparison_df['true_class'], comparison_df['predicted_class'], target_names=os.listdir(test_base_dir))
print("Classification Report:\n", class_report)

# Generate confusion matrix
cm = confusion_matrix(comparison_df['true_class'], comparison_df['predicted_class'], labels=os.listdir(test_base_dir))

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=os.listdir(test_base_dir), yticklabels=os.listdir(test_base_dir))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Calculate classification errors
comparison_df['error'] = comparison_df['true_class'] != comparison_df['predicted_class']
error_count = comparison_df['error'].sum()
total_count = len(comparison_df)
error_rate = error_count / total_count

print(f"Total classification errors: {error_count}")
print(f"Error rate: {error_rate:.2f}")

In [None]:
import os
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# Assume comparison_df is already created

# Calculate accuracy
accuracy = accuracy_score(comparison_df['true_class'], comparison_df['predicted_class'])
accuracy_percentage = accuracy * 100
accuracy_formatted = f"{accuracy:.6f}"
accuracy_percentage_formatted = f"{accuracy_percentage:.4f}%"
print(f"Accuracy: {accuracy_formatted} ({accuracy_percentage_formatted})")

# Generate classification report
class_report = classification_report(comparison_df['true_class'], comparison_df['predicted_class'], target_names=os.listdir(test_base_dir), digits=4)
print("Classification Report:\n", class_report)

# Generate confusion matrix
cm = confusion_matrix(comparison_df['true_class'], comparison_df['predicted_class'], labels=os.listdir(test_base_dir))

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=os.listdir(test_base_dir), yticklabels=os.listdir(test_base_dir))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.savefig('confusion_matrix.png')  # Save as PNG
plt.show()
plt.close()  # Close the plot to free memory

# Calculate classification errors
comparison_df['error'] = comparison_df['true_class'] != comparison_df['predicted_class']
error_count = comparison_df['error'].sum()
total_count = len(comparison_df)
error_rate = error_count / total_count
error_rate_percentage = error_rate * 100
error_rate_formatted = f"{error_rate:.4f}"
error_rate_percentage_formatted = f"{error_rate_percentage:.2f}%"

print(f"Total classification errors: {error_count}")
print(f"Error rate: {error_rate_formatted} ({error_rate_percentage_formatted})")

# Export the results
# 1. Save accuracy to a text file
with open('accuracy.txt', 'w') as f:
    f.write(f"Accuracy: {accuracy_formatted} ({accuracy_percentage_formatted})\n")

# 2. Save classification report to a text file
with open('classification_report.txt', 'w') as f:
    f.write("Classification Report:\n")
    f.write(class_report)

# 3. Save the comparison DataFrame including errors to a CSV file
comparison_df.to_csv('classification_errors.csv', index=False)

# 4. Save error summary to a text file
with open('error_summary.txt', 'w') as f:
    f.write(f"Total classification errors: {error_count}\n")
    f.write(f"Error rate: {error_rate_formatted} ({error_rate_percentage_formatted})\n")