## Data Augmentation

## Oversampling for asian dataset by using SMOTE

In [2]:
import pandas as pd
from imblearn.over_sampling import SMOTE

DATASET_PATH = r'D:\StudySpace\Nam4_KyI\cap1_ai_feature\data\final_dataset_asian.csv'

# Load the data
df = pd.read_csv(DATASET_PATH)
X = df[['Weight', 'Height', 'Age']]
y = df['Exercise Recommendation Plan']

smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

print(pd.Series(y_resampled).value_counts())
updated_df = pd.concat([X_resampled, y_resampled], axis=1)
# Save the updated data

# df.to_csv(DATASET_PATH, index=False)

print(updated_df.describe()) 

Exercise Recommendation Plan
6    1527
5    1527
4    1527
7    1527
1    1527
3    1527
2    1527
Name: count, dtype: int64
             Weight        Height           Age  Exercise Recommendation Plan
count  10689.000000  10689.000000  10689.000000                  10689.000000
mean      73.096537      1.792563     41.347554                      4.000000
std       21.167430      0.125538     13.746923                      2.000094
min       50.000000      1.390171     18.000000                      1.000000
25%       55.956581      1.731472     29.000000                      2.000000
50%       65.728249      1.805061     41.000000                      4.000000
75%       87.356168      1.891175     53.000000                      6.000000
max      160.000000      1.990000     65.000000                      7.000000


In [3]:
import pandas as pd
from imblearn.over_sampling import SMOTE

DATASET_PATH = r'D:\StudySpace\Nam4_KyI\cap1_ai_feature\data\final_dataset_asian.csv'
OUTPUT_PATH = r'D:\StudySpace\Nam4_KyI\cap1_ai_feature\data\oversampled_dataset.csv'

df = pd.read_csv(DATASET_PATH)

# Chọn các đặc trưng và nhãn mục tiêu
X = df[['Weight', 'Height', 'BMI', 'Gender', 'Age', 'BMIcase']]
y = df['Exercise Recommendation Plan']

# Transform the categorical columns into one-hot encoded columns
X_encoded = pd.get_dummies(X, columns=['Gender', 'BMIcase'], drop_first=True)

smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_encoded, y)

# Transform the resampled features back to DataFrame
X_resampled = pd.DataFrame(X_resampled, columns=X_encoded.columns)

# Process Gender column
X_resampled['Gender'] = X_resampled[['Gender_Male']].apply(lambda x: 'Male' if x[0] == 1 else 'Female', axis=1)
X_resampled.drop(columns=['Gender_Male'], inplace=True)

# Process the BMIcase columns
bmicase_columns = [col for col in X_resampled.columns if col.startswith('BMIcase_')]
X_resampled['BMIcase'] = X_resampled[bmicase_columns].idxmax(axis=1).apply(lambda x: x.split('_')[1])
X_resampled.drop(columns=bmicase_columns, inplace=True)

# Combine features and target into a single DataFrame
resampled_df = pd.concat([X_resampled, pd.Series(y_resampled, name='Exercise Recommendation Plan')], axis=1)

resampled_df.to_csv(OUTPUT_PATH, index=False)

print("Dataset after oversampling:")
print(resampled_df['Exercise Recommendation Plan'].value_counts())


  X_resampled['Gender'] = X_resampled[['Gender_Male']].apply(lambda x: 'Male' if x[0] == 1 else 'Female', axis=1)


Dataset after oversampling:
Exercise Recommendation Plan
6    1527
5    1527
4    1527
7    1527
1    1527
3    1527
2    1527
Name: count, dtype: int64


## Oversampling for european dataset by using SMOTE

In [1]:
import pandas as pd
from imblearn.over_sampling import SMOTE

# Đường dẫn đến tập dữ liệu gốc
DATASET_PATH = r'D:\StudySpace\Nam4_KyI\cap1_ai_feature\data\final_dataset.csv'
OUTPUT_PATH = r'D:\StudySpace\Nam4_KyI\cap1_ai_feature\data\balanced_dataset_european.csv'

df = pd.read_csv(DATASET_PATH)

X = df[['Weight', 'Height', 'BMI', 'Gender', 'Age', 'BMIcase']]
y = df['Exercise Recommendation Plan']

# Transform the categorical columns into one-hot encoded columns
X_encoded = pd.get_dummies(X, columns=['Gender', 'BMIcase'], drop_first=True)

smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_encoded, y)

# Transform the resampled features back to DataFrame
X_resampled = pd.DataFrame(X_resampled, columns=X_encoded.columns)

# Process Gender column
X_resampled['Gender'] = X_resampled[['Gender_Male']].apply(lambda x: 'Male' if x[0] == 1 else 'Female', axis=1)
X_resampled.drop(columns=['Gender_Male'], inplace=True)

# Process the BMIcase columns
bmicase_columns = [col for col in X_resampled.columns if col.startswith('BMIcase_')]
X_resampled['BMIcase'] = X_resampled[bmicase_columns].idxmax(axis=1).apply(lambda x: x.split('_')[1])
X_resampled.drop(columns=bmicase_columns, inplace=True)

# Combine features and target into a single DataFrame
resampled_df = pd.concat([X_resampled, pd.Series(y_resampled, name='Exercise Recommendation Plan')], axis=1)

resampled_df.to_csv(OUTPUT_PATH, index=False)

print("Dataset after oversampling:")
print(resampled_df['Exercise Recommendation Plan'].value_counts())


  X_resampled['Gender'] = X_resampled[['Gender_Male']].apply(lambda x: 'Male' if x[0] == 1 else 'Female', axis=1)


Dataset after oversampling:
Exercise Recommendation Plan
5    1467
4    1467
6    1467
7    1467
1    1467
3    1467
2    1467
Name: count, dtype: int64


# Posture Checking Test

In [5]:
import cv2
print(cv2.getBuildInformation())


  Version control:               4.11.0

  Platform:
    Timestamp:                   2025-01-16T09:54:51Z
    Host:                        Windows 10.0.17763 AMD64
    CMake:                       3.24.2
    CMake generator:             Visual Studio 14 2015
    CMake build tool:            MSBuild.exe
    MSVC:                        1900
    Configuration:               Debug Release
    Algorithm Hint:              ALGO_HINT_ACCURATE

  CPU/HW features:
    Baseline:                    SSE SSE2 SSE3
      requested:                 SSE3
    Dispatched code generation:  SSE4_1 SSE4_2 AVX FP16 AVX2
      requested:                 SSE4_1 SSE4_2 AVX FP16 AVX2 AVX512_SKX
      SSE4_1 (16 files):         + SSSE3 SSE4_1
      SSE4_2 (1 files):          + SSSE3 SSE4_1 POPCNT SSE4_2
      AVX (8 files):             + SSSE3 SSE4_1 POPCNT SSE4_2 AVX
      FP16 (0 files):            + SSSE3 SSE4_1 POPCNT SSE4_2 AVX FP16
      AVX2 (36 files):           + SSSE3 SSE4_1 POPCNT SSE4_2 AVX FP16 A