In [24]:
import pandas as pd
import numpy as np
import os
from tqdm.auto import tqdm

from autogluon.tabular import TabularDataset, TabularPredictor

import glob

import torch
from torchvision import models, transforms
import cv2
from PIL import Image
from scipy.stats import skew
import shutil

In [12]:

rootdir = 'C:/Users/user/git/MiraeCity/gesture/labels/txt/'

# Get all txt files in root directory
txt_files = glob.glob(os.path.join(rootdir, '*.txt'))

for txt_file in txt_files:
    # Read txt file
    with open(txt_file, 'r') as f:
        data = f.readlines()
    
    # Split each line into index and label
    records = [line.split() for line in data]
    
    # Convert to DataFrame
    df = pd.DataFrame(records, columns=['index', 'label'])
    
    # Convert txt file name to csv
    csv_file = txt_file.replace('.txt', '.csv')
    
    # Save as csv
    df.to_csv(csv_file, index=False)

In [13]:
print(torch.cuda.is_available())

True


In [25]:
# Source directory
src_dir = 'C:/Users/user/git/MiraeCity/gesture/data/test'
# Destination directories
dst_dir1 = 'C:/Users/user/git/MiraeCity/gesture/data/train'
dst_dir2 = 'C:/Users/user/git/MiraeCity/gesture/data/validation'

# Create destination directories if they don't exist
os.makedirs(dst_dir1, exist_ok=True)
os.makedirs(dst_dir2, exist_ok=True)

# Get all file names in the source directory
files = os.listdir(src_dir)

# Randomly shuffle the file names
np.random.shuffle(files)

# Split files into two groups
split_point = int(len(files) / 10)
group1 = files[:split_point]
group2 = files[split_point:]

# Move files to destination directories
for file_name in group1:
    shutil.move(os.path.join(src_dir, file_name), os.path.join(dst_dir1, file_name))

for file_name in group2:
    shutil.move(os.path.join(src_dir, file_name), os.path.join(dst_dir2, file_name))

In [2]:
def extract_frames(video_dir, output_dir):
    
    # Get all the .mp4 files in the directory
    videos = glob.glob(os.path.join(video_dir, '*.mp4'))

    # Make sure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Iterate over each video
    for video_path in videos:
        
        # Get the video filename without the extension
        video_name = os.path.splitext(os.path.basename(video_path))[0]
        
        # Open the video file
        video = cv2.VideoCapture(video_path)

        # Initialize the frame count
        count = 0

        while True:
            # Read the next frame
            ret, frame = video.read()

            # If the frame is not valid, break the loop
            if not ret:
                break

            # Write the frame to a JPEG file
            frame_name = f"{video_name}_frame{count:04d}.jpg"
            cv2.imwrite(os.path.join(output_dir, frame_name), frame)

            # Increment the frame count
            count += 1

        # Release the video file
        video.release()

In [26]:
extract_frames('../gesture/data/train', '../gesture/data/train_img')
# extract_frames('../gesture/data/val', '../gesture/data/val_img')

In [3]:
##### data preprocessing
# Instantiate a pre-trained ResNet50 model
model = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))  # remove the last layer
model.eval()

# Transformation pipeline
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [28]:
# Function to extract features from an image
def extract_features(image_path):
    # Read the image
    image = cv2.imread(image_path)

    # Calculate the mean, std dev and skewness of each color channel
    features = []
    for channel in cv2.split(image):
        features.append(np.mean(channel))
        features.append(np.std(channel))
        features.append(skew(channel.ravel()))

    return features

# Directory containing images
image_dir = 'C:/Users/user/git/MiraeCity/gesture/data/train_img'

# Dataframe to hold features
df = pd.DataFrame()

# Loop through each image in the directory
for image_path in os.listdir(image_dir):
    # Get full image path
    full_path = os.path.join(image_dir, image_path)

    # Extract features from the image
    features = extract_features(full_path)

    # Add features to dataframe
    df = df.append(pd.Series(features), ignore_index=True)

# Add column names to the dataframe
df.columns = ['R_mean', 'R_std', 'R_skew', 'G_mean', 'G_std', 'G_skew', 'B_mean', 'B_std', 'B_skew']

# Save the dataframe to a csv file
df.to_csv('image_features.csv', index=False)

  df = df.append(pd.Series(features), ignore_index=True)
  df = df.append(pd.Series(features), ignore_index=True)


In [29]:
# Directory containing images
image_dir = 'C:/Users/user/git/MiraeCity/gesture/data/train_img'

# Read the existing CSV file
df = pd.read_csv('image_features.csv')

# Add an 'index' column with the file names from the image directory
df['index'] = [os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.endswith('.jpg')]

# Reorder the columns to put 'index' first
df = df[['index', 'R_mean', 'R_std', 'R_skew', 'G_mean', 'G_std', 'G_skew', 'B_mean', 'B_std', 'B_skew']]

# Write the updated DataFrame to a new CSV file
df.to_csv('image_features_with_index.csv', index=False)

In [31]:
# Read the existing CSV file
df = pd.read_csv('image_features_with_index.csv')

# Create a new column 'index_mod' and remove the part after '_frame'
df['index_mod'] = df['index'].apply(lambda x: x.split('_frame')[0])

# Read the labels from the txt file
labels = pd.read_csv('C:/Users/user/git/MiraeCity/gesture/labels/txt/test_ann.txt', 
                     sep=" ", 
                     header=None, 
                     names=['index', 'label'])

# Create a new column 'index_mod' and remove the '.mp4' extension
labels['index_mod'] = labels['index'].str.replace('.mp4', '', regex=False)

# Merge the features and labels DataFrames on the 'index_mod' column
df = pd.merge(df, labels, on='index_mod', how='outer')

# Write the updated DataFrame to a new CSV file
df.to_csv('image_features_with_index_and_labels.csv', index=False)

In [32]:

train_df = pd.read_csv('C:/Users/user/git/MiraeCity/gesture/image_features_with_index_and_labels.csv')
# test_df = pd.read_csv('C:/Users/user/git/MiraeCity/gesture/labels/csv/test_ann.csv')

train_data = TabularDataset(train_df)
#test_data = TabularDataset(test_df)

In [33]:
#### autogluon
label = 'label'
eval_metric = 'accuracy'
time_limit = 3600 * 1 # hrs

predictor = TabularPredictor(
    label=label, eval_metric=eval_metric
).fit(train_data, presets='best_quality', time_limit=time_limit, ag_args_fit={'num_gpus': 0, 'num_cpus': 12})

No path specified. Models will be saved in: "AutogluonModels\ag-20230604_011209\"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=20
Beginning AutoGluon training ... Time limit = 3600s
AutoGluon will save models to "AutogluonModels\ag-20230604_011209\"
AutoGluon Version:  0.7.0
Python Version:     3.9.13
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Train Data Rows:    51139
Train Data Columns: 12
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == int, but few unique label-values observed).
	5 unique label values:  [3, 4, 0, 1, 2]
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Train Data Class Count: 5
Using Feature Generators to prepr

In [34]:
### leaderboard
predictor.leaderboard(silent=True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,0.972741,10.628632,2126.484684,0.0,3.440681,3,True,26
1,CatBoost_BAG_L2,0.972585,4.720982,1225.468858,0.166535,228.091681,2,True,20
2,LightGBMXT_BAG_L2,0.972154,4.760229,1057.426557,0.205783,60.04938,2,True,16
3,XGBoost_BAG_L2,0.972017,4.988222,1081.100321,0.433776,83.723145,2,True,23
4,LightGBM_BAG_L2,0.97147,4.739654,1056.760814,0.185207,59.383637,2,True,17
5,RandomForestGini_BAG_L2,0.970903,5.817471,999.098166,1.263025,1.720989,2,True,18
6,ExtraTreesGini_BAG_L2,0.970473,5.986602,998.171059,1.432155,0.793883,2,True,21
7,RandomForestEntr_BAG_L2,0.970414,5.958028,999.634418,1.403582,2.257242,2,True,19
8,ExtraTreesEntr_BAG_L2,0.970336,5.926689,998.178229,1.372243,0.801052,2,True,22
9,LightGBMLarge_BAG_L2,0.970082,4.815897,1090.884175,0.261451,93.506999,2,True,25


In [None]:
#### inference 
model_to_use = predictor.get_model_best()
model_pred = predictor.predict(train_data, model=model_to_use)

In [None]:
#### result
result = pd.DataFrame()

result['place'] = model_pred
result.to_csv('result.csv', index=False)