In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow	import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.metrics import r2_score

from datetime import datetime


In [2]:
raw_data = pd.read_csv('train.csv')
raw_data

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,0,male,36,189.0,82.0,26.0,101.0,41.0,150.0
1,1,female,64,163.0,60.0,8.0,85.0,39.7,34.0
2,2,female,51,161.0,64.0,7.0,84.0,39.8,29.0
3,3,male,20,192.0,90.0,25.0,105.0,40.7,140.0
4,4,female,38,166.0,61.0,25.0,102.0,40.6,146.0
...,...,...,...,...,...,...,...,...,...
749995,749995,male,28,193.0,97.0,30.0,114.0,40.9,230.0
749996,749996,female,64,165.0,63.0,18.0,92.0,40.5,96.0
749997,749997,male,60,162.0,67.0,29.0,113.0,40.9,221.0
749998,749998,male,45,182.0,91.0,17.0,102.0,40.3,109.0


In [3]:
trash_variables = ['id']
target_variables = ['Calories']
feature_variables = raw_data.drop(columns=trash_variables + target_variables).columns.to_list()
dummy_variables = ['Sex']
scale_variables = raw_data[feature_variables].drop(columns=dummy_variables).columns.to_list()

print(trash_variables)
print(target_variables)
print(feature_variables)
print(dummy_variables)
print(scale_variables)

['id']
['Calories']
['Sex', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']
['Sex']
['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']


In [4]:
dummied_features_df = pd.get_dummies(raw_data[dummy_variables], drop_first=True, dtype='int', columns=dummy_variables)
dummied_features_df.head()

Unnamed: 0,Sex_male
0,1
1,0
2,0
3,1
4,0


In [5]:
X_train, X_test, y_train, y_test = train_test_split(raw_data[scale_variables], raw_data[target_variables], test_size=.3)
print(X_train.head())
print(y_train.head())

        Age  Height  Weight  Duration  Heart_Rate  Body_Temp
586435   21   157.0    57.0      15.0       104.0       40.1
23133    49   160.0    63.0       3.0        81.0       39.4
128803   33   149.0    53.0      23.0       102.0       40.7
48950    59   189.0    91.0      15.0        92.0       40.4
12345    49   176.0    82.0       7.0        91.0       39.4
        Calories
586435      85.0
23133       10.0
128803     134.0
48950       86.0
12345       32.0


In [6]:
scaler = StandardScaler().fit(X_train.values)
scaled_features_train = scaler.transform(X_train.values)
scaled_features_train_df = pd.DataFrame(scaled_features_train, columns=scale_variables)
X_train = pd.merge(scaled_features_train_df, dummied_features_df, left_index=True, right_index=True)

scaled_features_test = scaler.transform(X_test.values)
scaled_features_test_df = pd.DataFrame(scaled_features_test, columns = scale_variables)
X_test = pd.merge(scaled_features_test_df, dummied_features_df, left_index=True, right_index=True)


print(X_train)
print(X_test)

             Age    Height    Weight  Duration  Heart_Rate  Body_Temp  \
0      -1.346883 -1.381043 -1.299269 -0.050779    0.901658   0.081182   
1       0.499002 -1.147205 -0.870245 -1.487522   -1.534212  -0.817017   
2      -0.555789 -2.004611 -1.585285  0.907049    0.689843   0.851067   
3       1.158247  1.113227  1.131866 -0.050779   -0.369230   0.466125   
4       0.499002  0.099930  0.488331 -1.008608   -0.475138  -0.817017   
...          ...       ...       ...       ...         ...        ...   
524995 -1.083185 -0.913367 -1.227765 -1.367794   -2.063749  -1.330274   
524996 -0.160243  0.333768  0.559835  0.308406    0.795751   0.722753   
524997  1.158247 -0.757476 -0.655733 -0.050779   -0.051508   0.337810   
524998  0.828624  2.438309  2.561946 -1.487522   -1.004675  -1.586902   
524999  1.356020  1.113227  1.346378 -1.008608   -0.263323  -0.688703   

        Sex_male  
0              1  
1              0  
2              0  
3              1  
4              0  
...      

In [7]:
model = Sequential()
input = InputLayer(shape = (X_train.shape[1], ))
model.add(input)
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
opt = Adam(learning_rate = 0.01)
model.compile(optimizer=opt, loss='mean_squared_logarithmic_error')
model.fit(X_train, y_train, epochs=3, verbose=1, batch_size=128)

Epoch 1/3
[1m4102/4102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 331us/step - loss: 0.5831
Epoch 2/3
[1m4102/4102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 335us/step - loss: 0.0075
Epoch 3/3
[1m4102/4102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 329us/step - loss: 0.0063


<keras.src.callbacks.history.History at 0x15e0906d0>

In [17]:
test_data = pd.read_csv('test.csv')

trash_variables = ['id']
target_variables = []
feature_variables = test_data.drop(columns=trash_variables + target_variables).columns.to_list()
dummy_variables = ['Sex']
scale_variables = test_data[feature_variables].drop(columns=dummy_variables).columns.to_list()

print(trash_variables)
print(target_variables)
print(feature_variables)
print(dummy_variables)
print(scale_variables)

dummied_features_df = pd.get_dummies(test_data[dummy_variables], drop_first=True, dtype='int', columns=dummy_variables)
dummied_features_df.head()

scaled_features = scaler.transform(test_data[scale_variables].values)
scaled_features_df = pd.DataFrame(scaled_features, columns=scale_variables)
X_test = pd.merge(scaled_features_df, dummied_features_df, left_index=True, right_index=True)

predictions = model.predict(X_test)


adjust_preds = []
for v in predictions:
    if v < 0:
        adjust_preds.append(0)
    else:
        adjust_preds.append(v)


ids_df = test_data.id
predictions_df = pd.DataFrame(adjust_preds)
output = pd.merge(ids_df, predictions_df, left_index=True, right_index=True)


now = datetime.now()

current_time = now.strftime("%H%M%S")
current_date = now.strftime("%Y-%m-%d")

("_" + current_date + "_" + current_time)

output.to_csv("predictions" + "_" + current_date + "_" + current_time)

['id']
[]
['Sex', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']
['Sex']
['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']
[1m7813/7813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 183us/step
