In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
from torchvision.io.video import read_video
from torchvision.models.video import r3d_18, R3D_18_Weights

vid, _, _ = read_video("v_SoccerJuggling_g24_c01.avi", output_format="TCHW")
vid = vid[:32]  # optionally shorten duration

# Step 1: Initialize model with the best available weights
weights = R3D_18_Weights.DEFAULT
model = r3d_18(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(vid).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
label = prediction.argmax().item()
score = prediction[label].item()
category_name = weights.meta["categories"][label]
print(f"{category_name}: {100 * score}%")

juggling soccer ball: 14.975011348724365%


In [6]:
from torchvision.io.video import read_video
from torchvision.models.video import r3d_18, R3D_18_Weights

vid, _, _ = read_video("v_SoccerJuggling_g23_c01.avi", output_format="TCHW")
# vid = vid[:32]  # optionally shorten duration

# Step 1: Initialize model with the best available weights
weights = R3D_18_Weights.DEFAULT
model = r3d_18(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(vid).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
label = prediction.argmax().item()
score = prediction[label].item()
category_name = weights.meta["categories"][label]
print(f"{category_name}: {100 * score}%")

juggling soccer ball: 99.91814494132996%


In [1]:
from torchvision.io.video import read_video
from torchvision.models.video import r3d_18, R3D_18_Weights

vid, _, _ = read_video("v_SoccerJuggling_test.mp4", output_format="TCHW")
print(len(vid))
vid = vid[:500]  # optionally shorten duration

# Step 1: Initialize model with the best available weights
weights = R3D_18_Weights.DEFAULT
model = r3d_18(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(vid).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
label = prediction.argmax().item()
score = prediction[label].item()
category_name = weights.meta["categories"][label]
print(f"{category_name}: {100 * score}%")



1094
juggling soccer ball: 57.67257213592529%


In [1]:
from torchvision.io.video import read_video
from torchvision.models.video import r3d_18, R3D_18_Weights

vid, _, _ = read_video("run_test.mp4", output_format="TCHW")
print(len(vid))
vid = vid[200:300]  # optionally shorten duration

# Step 1: Initialize model with the best available weights
weights = R3D_18_Weights.DEFAULT
model = r3d_18(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(vid).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
label = prediction.argmax().item()
score = prediction[label].item()
category_name = weights.meta["categories"][label]
print(f"{category_name}: {100 * score}%")



3301
hurdling: 57.0702850818634%


In [4]:
import torch
from torchvision.io.video import read_video
from torchvision.models.video import swin3d_b

# Read video
vid, _, _ = read_video("v_SoccerJuggling_g23_c01.avi", output_format="TCHW")
print(len(vid))

# Step 1: Initialize model with the best available weights
weights = "swin_tiny_patch4_window7_224_kinetics400"
model = create_swin3d(weights, num_classes=400, in_channels=3)
model.eval()

# Step 2: Apply inference preprocessing transforms
vid = vid.unsqueeze(0)
batch_size, num_frames, num_channels, height, width = vid.shape

# Step 3: Reshape and normalize the input video
vid = vid.reshape(batch_size * num_frames, num_channels, height, width)
vid = vid.float() / 255.0
mean = torch.tensor([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
std = torch.tensor([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)
vid = (vid - mean) / std

# Step 4: Use the model and print the predicted category
with torch.no_grad():
    prediction = model(vid)
    prediction = torch.nn.functional.softmax(prediction, dim=1)
    max_prob, max_idx = torch.max(prediction, dim=1)
    category_name = weights.meta["categories"][max_idx.item()]

print(f"{category_name}: {max_prob.item() * 100}%")


ImportError: cannot import name 'create_swin3d' from 'torchvision.models.video' (/home/dmitry/.local/lib/python3.10/site-packages/torchvision/models/video/__init__.py)

In [37]:
from torchvision.io.video import read_video
from torchvision.models.video import s3d, S3D_Weights

vid, _, _ = read_video("v_SoccerJuggling_g23_c01.avi", output_format="TCHW")
print(len(vid))
# vid = vid[200:300]  # optionally shorten duration

# Step 1: Initialize model with the best available weights
weights = S3D_Weights.DEFAULT
model = s3d(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(vid).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
label = prediction.argmax().item()
score = prediction[label].item()
category_name = weights.meta["categories"][label]
print(f"{category_name}: {100 * score}%")

240
juggling soccer ball: 99.93283748626709%


In [1]:
from torchvision.io.video import read_video
from torchvision.models.video import s3d, S3D_Weights

vid, _, _ = read_video("v_SoccerJuggling_g24_c01.avi", output_format="TCHW")
print(len(vid))
vid = vid[:100]  # optionally shorten duration

# Step 1: Initialize model with the best available weights
weights = S3D_Weights.DEFAULT
model = s3d(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(vid).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
label = prediction.argmax().item()
score = prediction[label].item()
category_name = weights.meta["categories"][label]
print(f"{category_name}: {100 * score}%")



251
juggling soccer ball: 99.20050501823425%


In [2]:
from torchvision.io.video import read_video
from torchvision.models.video import s3d, S3D_Weights

vid, _, _ = read_video("run_test.mp4", output_format="TCHW")
print(len(vid))
vid = vid[200:300]  # optionally shorten duration

# Step 1: Initialize model with the best available weights
weights = S3D_Weights.DEFAULT
model = s3d(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(vid).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
label = prediction.argmax().item()
score = prediction[label].item()
category_name = weights.meta["categories"][label]
print(f"{category_name}: {100 * score}%")

3301
hurdling: 93.86360049247742%
