download model 

In [6]:
import requests
import os

ROOT_URL = 'http://moments.csail.mit.edu/moments_models'
weights = {
    'resnet50': 'moments_v2_RGB_resnet50_imagenetpretrained.pth.tar',
    'resnet3d50': 'moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar',
    'multi_resnet3d50': 'multi_moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar',
}

def download_model_files(root_url, weights_dict, download_dir):
    os.makedirs(download_dir, exist_ok=True)  # Ensure directory exists
    for key, filename in weights_dict.items():
        file_path = os.path.join(download_dir, filename)  # Full path to save the file
        url = f"{root_url}/{filename}"
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)
            print(f"Downloaded {filename} successfully.")
        else:
            print(f"Failed to download {filename}. Status code: {response.status_code}")

# Specify a directory within your home directory for example
home_dir = os.path.expanduser('~')
download_dir = os.path.join(home_dir, 'downloaded_models')

# Call the function to download the model files
download_model_files(ROOT_URL, weights, download_dir)


Downloaded moments_v2_RGB_resnet50_imagenetpretrained.pth.tar successfully.
Downloaded moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar successfully.
Downloaded multi_moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar successfully.


locate the model path

In [49]:
import os
import fnmatch

def find_files(directory, pattern):
    """
    Search recursively for files matching the given pattern.
    """
    for root, dirs, files in os.walk(directory):
        for basename in files:
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                print(filename)

# Specify the directory to start the search from, e.g., the home directory or the entire filesystem
start_dir = '/'  # You can use '/' for entire filesystem on Unix or 'C:/' for Windows

# Model filename patterns from the download section
patterns = ['moments_v2_RGB_resnet50_imagenetpretrained.pth.tar', 
            'moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar', 
            'multi_moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar']

# Search for each pattern
for pattern in patterns:
    print(f"Searching for {pattern} in {start_dir}")
    find_files(start_dir, pattern)


Searching for moments_v2_RGB_resnet50_imagenetpretrained.pth.tar in /
/System/Volumes/Data/Users/wangjiji/moments_models/moments_v2_RGB_resnet50_imagenetpretrained.pth.tar
/System/Volumes/Data/Users/wangjiji/downloaded_models/moments_v2_RGB_resnet50_imagenetpretrained.pth.tar
/Users/wangjiji/moments_models/moments_v2_RGB_resnet50_imagenetpretrained.pth.tar
/Users/wangjiji/downloaded_models/moments_v2_RGB_resnet50_imagenetpretrained.pth.tar
Searching for moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar in /
/System/Volumes/Data/Users/wangjiji/moments_models/moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar


KeyboardInterrupt: 

label the video 2D model

In [70]:
import os
import cv2
import torch
import torchvision.transforms as transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def load_model(model_path):
    model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, 305)
    model.load_state_dict(torch.load(model_path)['state_dict'])
    model.eval()
    return model

def load_labels(label_files):
    labels = {}
    current_index = 0
    for file_path in label_files:
        with open(file_path, 'r') as file:
            for line in file:
                labels[current_index] = line.strip()
                current_index += 1
    return labels

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def label_video(video_path, model, labels):
    predictions = []
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = transform(frame)
        frame = frame.unsqueeze(0)
        frame = Variable(frame)
        with torch.no_grad():
            outputs = model(frame)
            _, predicted = torch.max(outputs.data, 1)
            predicted_label = labels.get(predicted.item(), "Unknown Label")
            predictions.append(predicted_label)
    cap.release()
    return predictions

# Parameters
model_path = '/System/Volumes/Data/Users/wangjiji/moments_models/moments_v2_RGB_resnet50_imagenetpretrained.pth.tar'
label_files = [
    '/Users/wangjiji/Desktop/moments_models-2/category_momentsv2.txt',
    '/Users/wangjiji/Desktop/moments_models-2/category_multi_momentsv2.txt'
]
labels = load_labels(label_files)
model = load_model(model_path)
video_directory = '/Users/wangjiji/Desktop/Social touch deep learning project/STMP4/'
video_files = [f"{i:03}.mp4" for i in range(1, 72)]  # Generate video filenames from 001.mp4 to 071.mp4

# Process each video and collect results
all_predictions = {}
for video_file in video_files:
    video_path = os.path.join(video_directory, video_file)
    if os.path.exists(video_path):
        predictions = label_video(video_path, model, labels)
        all_predictions[video_file] = predictions
    else:
        print(f"File not found: {video_path}")

# Example: Saving results to a CSV file
df = pd.DataFrame.from_dict(all_predictions, orient='index')
df.to_csv('/Users/wangjiji/Desktop/predictions2D.csv')



Using cache found in /Users/wangjiji/.cache/torch/hub/pytorch_vision_v0.10.0


File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/006.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/009.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/021.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/023.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/031.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/032.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/034.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/036.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/040.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/046.mp4
File not found: /Users/wangjiji/Desktop/Social touch deep learning project/STMP4/053.mp4
File not found: /User

label the video 3D model

In [73]:
import os
import cv2
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fvcore
import pytorchvideo


def load_model(model_path):
    # Assuming model_path is correctly set to your pre-trained model
    model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, 305)  # Adjust according to the number of classes
    model.load_state_dict(torch.load(model_path)['state_dict'])
    model.eval()
    return model

def load_labels(label_files):
    labels = {}
    current_index = 0
    for file_path in label_files:
        with open(file_path, 'r') as file:
            for line in file:
                labels[current_index] = line.strip()
                current_index += 1
    return labels

def video_to_tensor(video_path, sequence_length=16):
    """Extract sequences from a video file and convert them to tensor format."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < sequence_length and cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    cap.release()

    if len(frames) != sequence_length:
        return None

    transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
    
    frame_tensors = torch.stack([transform(image) for image in frames])
    frame_tensors = frame_tensors.unsqueeze(0)  # Add batch dimension
    return frame_tensors

def predict_video(model, video_tensor, labels):
    """Predict the label for a video tensor."""
    with torch.no_grad():
        outputs = model(video_tensor)
        _, predicted = torch.max(outputs, 1)
        predicted_label = labels.get(predicted.item(), "Unknown Label")
    return predicted_label

# Initialize model and labels
model_path = 'Users/wangjiji/moments_v2_RGB_imagenet_resnet3d50_segment16.pth.tar'  # Update this path
label_file_path =  [
    '/Users/wangjiji/Desktop/moments_models-2/category_momentsv2.txt',
    '/Users/wangjiji/Desktop/moments_models-2/category_multi_momentsv2.txt'
]
model = load_model(model_path)
labels = load_labels(label_file_path)

# Directory containing videos
video_directory = '/Users/wangjiji/Desktop/Social touch deep learning project/STMP4/'
video_files = [f"{i:03}.mp4" for i in range(1, 72)]

# Process videos and store predictions
predictions = {}
for video_file in video_files:
    video_path = os.path.join(video_directory, video_file)
    video_tensor = video_to_tensor(video_path)
    if video_tensor is not None:
        prediction = predict_video(model, video_tensor, labels)
        predictions[video_file] = prediction
    else:
        print(f"Skipped video {video_file}: insufficient frames")

# Save results to a CSV
df = pd.DataFrame.from_dict(predictions, orient='index', columns=['Prediction'])
df.to_csv('/path/to/output/predictions.csv')



Using cache found in /Users/wangjiji/.cache/torch/hub/pytorch_vision_v0.10.0


RuntimeError: Error(s) in loading state_dict for ResNet:
	Missing key(s) in state_dict: "fc.weight", "fc.bias". 
	Unexpected key(s) in state_dict: "last_linear.weight", "last_linear.bias". 
	size mismatch for conv1.weight: copying a param with shape torch.Size([64, 3, 7, 7, 7]) from checkpoint, the shape in current model is torch.Size([64, 3, 7, 7]).
	size mismatch for layer1.0.conv1.weight: copying a param with shape torch.Size([64, 64, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1, 1]).
	size mismatch for layer1.0.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]).
	size mismatch for layer1.0.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]).
	size mismatch for layer1.0.downsample.0.weight: copying a param with shape torch.Size([256, 64, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]).
	size mismatch for layer1.1.conv1.weight: copying a param with shape torch.Size([64, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]).
	size mismatch for layer1.1.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]).
	size mismatch for layer1.1.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]).
	size mismatch for layer1.2.conv1.weight: copying a param with shape torch.Size([64, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]).
	size mismatch for layer1.2.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]).
	size mismatch for layer1.2.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]).
	size mismatch for layer2.0.conv1.weight: copying a param with shape torch.Size([128, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 256, 1, 1]).
	size mismatch for layer2.0.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for layer2.0.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]).
	size mismatch for layer2.0.downsample.0.weight: copying a param with shape torch.Size([512, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 256, 1, 1]).
	size mismatch for layer2.1.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]).
	size mismatch for layer2.1.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for layer2.1.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]).
	size mismatch for layer2.2.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]).
	size mismatch for layer2.2.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for layer2.2.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]).
	size mismatch for layer2.3.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]).
	size mismatch for layer2.3.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for layer2.3.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]).
	size mismatch for layer3.0.conv1.weight: copying a param with shape torch.Size([256, 512, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 512, 1, 1]).
	size mismatch for layer3.0.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for layer3.0.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 256, 1, 1]).
	size mismatch for layer3.0.downsample.0.weight: copying a param with shape torch.Size([1024, 512, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 512, 1, 1]).
	size mismatch for layer3.1.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 1024, 1, 1]).
	size mismatch for layer3.1.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for layer3.1.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 256, 1, 1]).
	size mismatch for layer3.2.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 1024, 1, 1]).
	size mismatch for layer3.2.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for layer3.2.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 256, 1, 1]).
	size mismatch for layer3.3.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 1024, 1, 1]).
	size mismatch for layer3.3.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for layer3.3.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 256, 1, 1]).
	size mismatch for layer3.4.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 1024, 1, 1]).
	size mismatch for layer3.4.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for layer3.4.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 256, 1, 1]).
	size mismatch for layer3.5.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 1024, 1, 1]).
	size mismatch for layer3.5.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 256, 3, 3]).
	size mismatch for layer3.5.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 256, 1, 1]).
	size mismatch for layer4.0.conv1.weight: copying a param with shape torch.Size([512, 1024, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 1024, 1, 1]).
	size mismatch for layer4.0.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([512, 512, 3, 3]).
	size mismatch for layer4.0.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([2048, 512, 1, 1]).
	size mismatch for layer4.0.downsample.0.weight: copying a param with shape torch.Size([2048, 1024, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([2048, 1024, 1, 1]).
	size mismatch for layer4.1.conv1.weight: copying a param with shape torch.Size([512, 2048, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 2048, 1, 1]).
	size mismatch for layer4.1.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([512, 512, 3, 3]).
	size mismatch for layer4.1.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([2048, 512, 1, 1]).
	size mismatch for layer4.2.conv1.weight: copying a param with shape torch.Size([512, 2048, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 2048, 1, 1]).
	size mismatch for layer4.2.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([512, 512, 3, 3]).
	size mismatch for layer4.2.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1, 1]) from checkpoint, the shape in current model is torch.Size([2048, 512, 1, 1]).