In [1]:
#Import Dependencies

%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import os
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import session
from sqlalchemy import create_engine, func
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf

In [2]:
#SOURCE: https://www.tutorialspoint.com/python_data_access/python_postgresql_database_connection.htm
import psycopg2

#establishing the connection
conn = psycopg2.connect(
   database="Body_Performance", user='postgres', password='Cheese6132', host='localhost', port= '5432'
)
#Creating a cursor object using the cursor() method
cursor = conn.cursor()

#Executing an MYSQL function using the execute() method
cursor.execute("select version()")

# Fetch a single row using fetchone() method.
data = cursor.fetchone()
print("Connection established to: ",data)



Connection established to:  ('PostgreSQL 12.13, compiled by Visual C++ build 1914, 64-bit',)


In [3]:
body_df = pd.read_sql('select * from body_performance', con=conn)
body_df.head()

Unnamed: 0,age,gender,height_cm,weight_kg,body_fat_pct,diastolic,systolic,gripforce,sit_and_bend_forward_cm,sit_ups_counts,broad_jump_cm,ranking
0,27,M,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,C
1,25,M,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,A
2,31,M,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,C
3,32,M,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,B
4,28,M,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,B


In [4]:
#Closing the connection
conn.close()
print("Closed connection established to: ",data)


Closed connection established to:  ('PostgreSQL 12.13, compiled by Visual C++ build 1914, 64-bit',)


In [5]:
# OneHotEncoder or Get_dummies on Gender
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

encode1 = pd.DataFrame(enc.fit_transform(body_df.gender.values.reshape(-1,1)))

encode1.columns = enc.get_feature_names_out(['gender'])
encode1.head()


Unnamed: 0,gender_F,gender_M
0,0.0,1.0
1,0.0,1.0
2,0.0,1.0
3,0.0,1.0
4,0.0,1.0


In [6]:
#If keeping class/level in one column as Y, Encode with dictionary values
ranking_num = {"A":0, "B":1, "C":2, "D":3}
body_df["ranking_num"] = body_df["ranking"].apply(lambda x: ranking_num[x])
body_df.head()


Unnamed: 0,age,gender,height_cm,weight_kg,body_fat_pct,diastolic,systolic,gripforce,sit_and_bend_forward_cm,sit_ups_counts,broad_jump_cm,ranking,ranking_num
0,27,M,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,C,2
1,25,M,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,A,0
2,31,M,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,C,2
3,32,M,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,B,1
4,28,M,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,B,1


In [None]:
#Using oneHotEncoder for class into 4 separate

#encode2 = pd.DataFrame(enc.fit_transform(body_df.ranking.values.reshape(-1,1)))

#encode2.columns = enc.get_feature_names_out(["ranking"])
#encode2.head()

In [7]:
#Drop columns and merge encoded df
#Ignoring encode2 for now
body_df = body_df.drop(["gender", "ranking"], axis=1)

body_df2 = body_df.merge(encode1, left_index=True, right_index=True)
body_df2.head()


Unnamed: 0,age,height_cm,weight_kg,body_fat_pct,diastolic,systolic,gripforce,sit_and_bend_forward_cm,sit_ups_counts,broad_jump_cm,ranking_num,gender_F,gender_M
0,27,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,2,0.0,1.0
1,25,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,0,0.0,1.0
2,31,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,2,0.0,1.0
3,32,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,1,0.0,1.0
4,28,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,1,0.0,1.0


In [8]:
#Set X, y 
y = body_df2["ranking_num"].values
X = body_df2.drop("ranking_num", axis=1).values
    #X = X.drop("level_num", axis=1)

print(X.shape, y.shape)



(13393, 12) (13393,)


In [9]:
y[:10]

array([2, 0, 2, 1, 1, 1, 3, 1, 2, 1], dtype=int64)

In [10]:
#Split train/test data and scale using Standard Scaler at this time (mean of 0).  #Should we use MinMax(0-1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

scaler = StandardScaler().fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_scaled

array([[-0.7892366 , -0.91838934, -1.3348929 , ..., -0.56381633,
         1.3150467 , -1.3150467 ],
       [ 1.71061167, -0.50110528, -0.31377541, ..., -1.21956778,
        -0.76042927,  0.76042927],
       [-0.86276155,  0.16654921,  0.95843653, ...,  1.37821682,
        -0.76042927,  0.76042927],
       ...,
       [-1.00981144, -0.23881245,  1.05887432, ...,  0.49547448,
        -0.76042927,  0.76042927],
       [-1.15686134,  0.07117   , -0.21333763, ..., -0.41248907,
         1.3150467 , -1.3150467 ],
       [ 1.78413662, -1.20452697, -0.58160951, ..., -0.4881527 ,
        -0.76042927,  0.76042927]])

In [14]:
# Define the model - deep neural net...length of outputs/inputs
num_input_features = len(X_train_scaled[0])
num_output_features = 1  #len(y_train[0])
print(num_input_features)
print(num_output_features)

12
1


In [21]:
# Define the model - deep neural net
# Try with 2 hidden layers first but will try reduced. Look at different activation functions?
hidden_nodes_layer1 =  24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer1, input_dim=num_input_features,
    activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer  #Will need to round or should this be sigmoid?
nn.add(tf.keras.layers.Dense(units=num_output_features, activation="softmax"))

# Check the structure of the model
nn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 24)                312       
                                                                 
 dense_7 (Dense)             (None, 12)                300       
                                                                 
 dense_8 (Dense)             (None, 1)                 13        
                                                                 
Total params: 625
Trainable params: 625
Non-trainable params: 0
_________________________________________________________________


In [22]:
# Compile the model binary vs categorical. Categorical for multi-output  --https://machinelearningmastery.com/deep-learning-models-for-multi-output-regression/
#nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
nn.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [23]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [75]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

105/105 - 0s - loss: 0.2931 - accuracy: 0.7342 - 170ms/epoch - 2ms/step
Loss: 0.29306167364120483, Accuracy: 0.7342490553855896


NameError: name 'model' is not defined

In [51]:
# Define the model - deep neural net
# Try with 1 hidden layer keeping activation functions and node#

nn2 = tf.keras.models.Sequential()

# First hidden layer
nn2.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer1, input_dim=num_input_features,
    activation="relu"))

# Second hidden layer
#nn2.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer  #Will need to round or should this be sigmoid?
nn2.add(tf.keras.layers.Dense(units=4, activation="sigmoid"))

# Check the structure of the model
nn2.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_17 (Dense)            (None, 8)                 104       
                                                                 
 dense_18 (Dense)            (None, 4)                 36        
                                                                 
Total params: 140
Trainable params: 140
Non-trainable params: 0
_________________________________________________________________


In [52]:
# Compile the model
nn2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [53]:
# Train the model
fit_model = nn2.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [54]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

#One Hidden Layer lowered accuracy and inreased loss. 

105/105 - 0s - loss: 0.3615 - accuracy: 0.6417 - 128ms/epoch - 1ms/step
Loss: 0.3614822030067444, Accuracy: 0.6416840553283691


In [None]:
feature_importances = rf_model.feature_importances_
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

In [None]:
#Accuracy score and confusion Matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

predictions = rf_model.predict(X_test_scaled)
acc_score = accuracy_score(y_test, predictions)
print(f"Accuracy Score : {acc_score}")

In [None]:
# printing confusion matrix but error:      Shape of passed values is (4, 4), indices imply (2, 2) Needs to be resolved
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"],
    columns=["Predicted 0", "Predicted 1"])
display(cm_df)

In [None]:
#Classification report for random forest

print("Classification Report")
print(classification_report(y_test, predictions))

In [None]:
# Need to test feature importances. THis is one way but plot is not showing up how I'd like. Need to adjust.

features = sorted(zip(X.columns, rf_model.feature_importances_), key = lambda x: x[1])
cols = [f[0] for f in features]
width = [f[1] for f in features]

fig, ax = plt.subplots()

fig.set_size_inches(10,200)
plt.margins(y=0.001)

ax.barh(y=cols, width=width)

plt.show()

In [None]:
# Binning of Age? if so will need to also be encoded/scaled.

# Open in Google Collab or here and import tensor flow to run as Neural Network

# Try MinMax Scaler  #Should I scale entire 

# Try with multiply Y columns as output. Can I do this with trees or only neural network?