In [1]:
import numpy as np
import pandas as pd
import scipy as sp

from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import f1_score, r2_score, roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

In [2]:
trainData  = f'https://raw.githubusercontent.com/Datamanim/datarepo/main/muscle/train.csv'
testData  = f'https://raw.githubusercontent.com/Datamanim/datarepo/main/muscle/test.csv'
subData  = f'https://raw.githubusercontent.com/Datamanim/datarepo/main/muscle/submission.csv'

In [3]:
train = pd.read_csv(trainData)
test = pd.read_csv(testData)
sub = pd.read_csv(subData)

In [4]:
X = train.drop(['pose'], axis=1)
y = train['pose']

X = StandardScaler().fit_transform(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
rf = RandomForestClassifier().fit(X_train, y_train)
y_pred = rf.predict(X_test)
f1_score(y_test, y_pred)

0.9936708860759494

In [7]:
ab = AdaBoostClassifier().fit(X_train, y_train)
y_pred = ab.predict(X_test)
f1_score(y_test, y_pred)

0.9957894736842106

In [8]:
gb = GradientBoostingClassifier().fit(X_train, y_train)
y_pred = gb.predict(X_test)
f1_score(y_test, y_pred)

0.9957983193277311

In [9]:
lr = LogisticRegression(random_state=42).fit(X_train, y_train)
y_pred = lr.predict(X_test)
f1_score(y_test, y_pred)

0.6322067594433399

In [10]:
xg = XGBClassifier(random_state=42, eval_metric='mlogloss', use_label_encoder=False).fit(X_train, y_train)
y_pred = xg.predict(X_test)
f1_score(y_test, y_pred)

0.9978991596638656

In [12]:
# test = test.drop(['pose'], axis=1)
test = StandardScaler().fit_transform(test)

In [13]:
test_pred = rf.predict(test)
sub['0'] = test_pred
sub.to_csv('18375.csv', index=False)