In [12]:
import pandas as pd
import numpy as np
import os
from tqdm.auto import tqdm

from autogluon.tabular import TabularDataset, TabularPredictor

import glob

import torch
from torchvision import models, transforms
import cv2
from PIL import Image

In [12]:

rootdir = 'C:/Users/user/git/MiraeCity/gesture/labels/txt/'

# Get all txt files in root directory
txt_files = glob.glob(os.path.join(rootdir, '*.txt'))

for txt_file in txt_files:
    # Read txt file
    with open(txt_file, 'r') as f:
        data = f.readlines()
    
    # Split each line into index and label
    records = [line.split() for line in data]
    
    # Convert to DataFrame
    df = pd.DataFrame(records, columns=['index', 'label'])
    
    # Convert txt file name to csv
    csv_file = txt_file.replace('.txt', '.csv')
    
    # Save as csv
    df.to_csv(csv_file, index=False)

In [13]:
print(torch.cuda.is_available())

True


In [27]:
def extract_frames(video_dir, output_dir):
    
    # Get all the .mp4 files in the directory
    videos = glob.glob(os.path.join(video_dir, '*.mp4'))

    # Make sure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Iterate over each video
    for video_path in videos:
        
        # Get the video filename without the extension
        video_name = os.path.splitext(os.path.basename(video_path))[0]
        
        # Open the video file
        video = cv2.VideoCapture(video_path)

        # Initialize the frame count
        count = 0

        while True:
            # Read the next frame
            ret, frame = video.read()

            # If the frame is not valid, break the loop
            if not ret:
                break

            # Write the frame to a JPEG file
            frame_name = f"{video_name}_frame{count:04d}.jpg"
            cv2.imwrite(os.path.join(output_dir, frame_name), frame)

            # Increment the frame count
            count += 1

        # Release the video file
        video.release()

In [None]:
extract_frames('../gesture/data/test', '../gesture/data/test_img')
# extract_frames('../gesture/data/val', '../gesture/data/val_img')

In [31]:
##### data preprocessing
# Instantiate a pre-trained ResNet50 model
model = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))  # remove the last layer
model.eval()

# Transformation pipeline
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [30]:
# Function to extract features from an image
def extract_features(image_path):
    # Load the image
    image = Image.open(image_path)
    
    # Apply the transformation and add an extra dimension
    image = transform(image).unsqueeze(0)
    
    # Ensure we get the features from the correct device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    image = image.to(device)
    model.to(device)

    # Forward pass through the model
    with torch.no_grad():
        features = model(image)
    
    return features.cpu().numpy()

# Directory containing images
image_dir = 'path_to_your_images'

# Dataframe to hold features
df = pd.DataFrame()

# Loop through each image in the directory
for image_path in glob.glob(os.path.join(image_dir, '*.jpg')):
    # Extract features from the image
    features = extract_features(image_path)
    
    # Flatten the features and add them to the dataframe
    df = df.append(pd.Series(features.flatten()), ignore_index=True)

# Save the dataframe to a csv file
df.to_csv('test.csv', index=False)

In [33]:
extract_features('C:/Users/user/git/MiraeCity/gesture/data/test_img')
#extract_features('../gesture/data/val_img')

PermissionError: [Errno 13] Permission denied: 'C:/Users/user/git/MiraeCity/gesture/data/test_img'

In [None]:

# train_df = pd.read_csv('C:/Users/user/git/MiraeCity/gesture/labels/csv/train_ann.csv')
# test_df = pd.read_csv('C:/Users/user/git/MiraeCity/gesture/labels/csv/test_ann.csv')

train_data = TabularDataset(train_df)
test_data = TabularDataset(test_df)

In [14]:
train_data

Unnamed: 0,index,label
0,4-10_001-C01.mp4,0
1,4-10_001-C04.mp4,0
2,4-10_001-C05.mp4,0
3,4-10_001-C06.mp4,0
4,4-10_001-C07.mp4,0
...,...,...
7555,13-5_601-C10.mp4,4
7556,13-5_601-C11.mp4,4
7557,13-5_601-C12.mp4,4
7558,13-5_602-C01.mp4,4


In [None]:
#### autogluon
label = 'label'
eval_metric = 'accuracy'
time_limit = 3600 * 0.5 # hrs

predictor = TabularPredictor(
    label=label, eval_metric=eval_metric
).fit(train_data, presets='best_quality', time_limit=time_limit, ag_args_fit={'num_gpus': 0, 'num_cpus': 12})