# MLRD-20 Dataset Demo

This notebook demonstrates how to explore and visualize the Marathi Lip Reading Dataset (MLRD-20). It does not include any model training.

## 📁 Load and Explore Dataset Directory

In [None]:
import os

# Path to the dataset
data_path = "videos"  # Adjust if needed
video_files = sorted(os.listdir(data_path))[:5]
print("Sample video files:", video_files)

## 📄 Load Annotations

In [None]:
import pandas as pd

annotations = pd.read_csv("annotation.csv")
annotations.head()

## 📊 Word Distribution

In [None]:
import matplotlib.pyplot as plt

word_counts = annotations['word_text'].value_counts().sort_values(ascending=False)
plt.figure(figsize=(10, 5))
word_counts.plot(kind='bar')
plt.title("Word Distribution in MLRD-20")
plt.xlabel("Word")
plt.ylabel("Frequency")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 🎞️ Display a Sample Video Frame

In [None]:
import cv2
import matplotlib.pyplot as plt

sample_video = os.path.join(data_path, video_files[0])
cap = cv2.VideoCapture(sample_video)

ret, frame = cap.read()
cap.release()

if ret:
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    plt.imshow(frame_rgb)
    plt.title(f"Sample Frame from: {video_files[0]}")
    plt.axis('off')
    plt.show()
else:
    print("Failed to load video.")