In [1]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from sklearn import tree

## Objective

Predict body performance by using body performance data from https://www.kaggle.com/datasets/kukuroo3/body-performance-data
class : A,B,C,D ( A: best) 

In [2]:
body_perf_dc = pd.read_csv(r'../data/bodyPerformance.csv')
body_perf_dc.head()

Unnamed: 0,age,gender,height_cm,weight_kg,body fat_%,diastolic,systolic,gripForce,sit and bend forward_cm,sit-ups counts,broad jump_cm,class
0,27.0,M,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,C
1,25.0,M,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,A
2,31.0,M,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,C
3,32.0,M,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,B
4,28.0,M,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,B


In [3]:
body_perf_dc = body_perf_dc.replace({'gender':{'M':0 , 'F':1}, \
                                    'class':{'A':1, 'B':2, 'C':3, 'D':4}})
body_perf_dc.head()

Unnamed: 0,age,gender,height_cm,weight_kg,body fat_%,diastolic,systolic,gripForce,sit and bend forward_cm,sit-ups counts,broad jump_cm,class
0,27.0,0,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,3
1,25.0,0,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,1
2,31.0,0,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,3
3,32.0,0,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,2
4,28.0,0,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,2


In [4]:
target = body_perf_dc["class"]
target_names = ["1", "2", "3", "4"]

In [5]:
data = body_perf_dc.drop("class", axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,age,gender,height_cm,weight_kg,body fat_%,diastolic,systolic,gripForce,sit and bend forward_cm,sit-ups counts,broad jump_cm
0,27.0,0,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0
1,25.0,0,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0
2,31.0,0,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0
3,32.0,0,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0
4,28.0,0,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0


In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [7]:
# Support vector machine linear classifier
from sklearn.svm import SVC 
model = SVC(kernel='linear')

In [8]:
# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10],
              'gamma': [0.0001, 0.001, 0.01]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [9]:
# Fit the model using the grid search estimator. 
# This will take the SVC model and try each combination of parameters
grid.fit(X_train, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END .................C=1, gamma=0.0001;, score=0.632 total time= 1.1min
[CV 2/5] END .................C=1, gamma=0.0001;, score=0.629 total time= 1.1min
[CV 3/5] END .................C=1, gamma=0.0001;, score=0.629 total time= 1.0min
[CV 4/5] END .................C=1, gamma=0.0001;, score=0.598 total time= 1.0min
[CV 5/5] END .................C=1, gamma=0.0001;, score=0.617 total time= 1.1min
[CV 1/5] END ..................C=1, gamma=0.001;, score=0.632 total time= 1.1min
[CV 2/5] END ..................C=1, gamma=0.001;, score=0.629 total time= 1.1min
[CV 3/5] END ..................C=1, gamma=0.001;, score=0.629 total time= 1.1min
[CV 4/5] END ..................C=1, gamma=0.001;, score=0.598 total time= 1.0min
[CV 5/5] END ..................C=1, gamma=0.001;, score=0.617 total time= 1.1min
[CV 1/5] END ...................C=1, gamma=0.01;, score=0.632 total time= 1.1min
[CV 2/5] END ...................C=1, gamma=0.01;,

GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [1, 5, 10], 'gamma': [0.0001, 0.001, 0.01]},
             verbose=3)

In [10]:
# List the best parameters for this dataset
print(grid.best_params_)

{'C': 1, 'gamma': 0.0001}


In [15]:
# Make predictions with the hypertuned model
predictions = grid.predict(X_test)

In [17]:

# test = np.expand_dims(X_test[5], axis = 0)

# model.predict(test)

In [22]:
# Create pickle file
file = open('../models/body_performance_prediction2', 'wb') 

# Save the trained model to file
import pickle
pickle.dump(grid, file)
file.close()

In [26]:
# from keras.models import load_model
# # File path
# filepath = '../models/body_performance_prediction2'


# # Load the model
# model = load_model(filepath, compile = True)

# # random samples
# use_samples = [25,0,165,55.8,15.7,77,126,36.4,16.3,53,229]
# samples_to_predict = []

# # Convert into Numpy array
# samples_to_predict = np.array(samples_to_predict)

# # Generate predictions for samples
# predictions = model.predict(samples_to_predict)
# print(predictions)