# Data Analysis

In [40]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
%load_ext tensorboard

In [60]:
data = np.loadtxt("data.txt")
data_df = pd.read_csv("data.txt", sep=" ",
                      names=["distance_to_ground",
                             "x_diff",
                            "over_lz",
                            "dx",
                            "dy",
                            "safe_speed",
                            "tilt",
                            "safe_tilt",
                            "rotation",
                            "time_since_thrust",
                            "time_since_tilt",
                            "W",
                            "A",
                            "D"])

def speed(row):
    return np.sqrt(row["dx"]**2 + row["dy"]**2)

def any_pressed(row):
    return int(row["W"] == 0 and row["A"] == 0 and row["D"] == 0)

def target_vec(row):
    return [row["W"], row["A"], row["D"], row["Not_pressed"]]

data_df["Not_pressed"] = data_df.apply(any_pressed, axis=1)
data_df["Speed"] = data_df.apply(speed, axis=1)

data_df[(data_df["W"] != 0) & (data_df["A"] != 0)]

Unnamed: 0,distance_to_ground,x_diff,over_lz,dx,dy,safe_speed,tilt,safe_tilt,rotation,time_since_thrust,time_since_tilt,W,A,D,Not_pressed,Speed
1337,126.720093,31.080078,1,0.094044,63.292709,0,2.487534,1,10.0,0.0,0.0,1,1,0,0,63.292779
1338,125.412842,31.081665,1,0.102700,62.862968,0,2.610367,1,8.0,0.0,0.0,1,1,0,0,62.863052
1339,124.386230,31.083679,1,0.114380,62.331120,0,2.716785,1,6.0,0.0,0.0,1,1,0,0,62.331225
1340,123.227661,31.085876,1,0.127457,61.777687,0,2.785999,1,4.0,0.0,0.0,1,1,0,0,61.777818
1341,122.124146,31.088440,1,0.142696,61.171474,0,2.821678,1,2.0,0.0,0.0,1,1,0,0,61.171640
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36585,18.270508,-4.270691,1,6.137001,17.756762,1,3.216187,1,0.0,0.0,0.0,1,1,0,0,18.787373
36586,17.996338,-4.160828,1,6.163894,17.400999,1,3.180542,1,-2.0,0.0,0.0,1,1,0,0,18.460454
36587,17.703735,-4.050476,1,6.192720,17.009644,1,3.109253,1,-4.0,0.0,0.0,1,1,0,0,18.101872
36588,17.435791,-3.939209,1,6.223519,16.581230,1,3.001953,1,-6.0,0.0,0.0,1,1,0,0,17.710714


In [61]:
data_df.corr().style.background_gradient(cmap=plt.get_cmap("RdYlGn"))

Unnamed: 0,distance_to_ground,x_diff,over_lz,dx,dy,safe_speed,tilt,safe_tilt,rotation,time_since_thrust,time_since_tilt,W,A,D,Not_pressed,Speed
distance_to_ground,1.0,0.076651,-0.487069,-0.078348,0.272085,-0.211768,-0.048435,-0.239975,-0.00448124,0.408703,-0.119235,-0.244156,0.0229585,0.0249671,0.180806,0.214924
x_diff,0.076651,1.0,0.0245871,-0.519595,0.0285593,-0.0217595,-0.501479,0.0240652,0.00224287,0.152216,0.0186014,-0.0422924,-0.0279498,-0.0185491,0.0511871,0.0418112
over_lz,-0.487069,0.0245871,1.0,0.0198944,-0.222081,0.278094,-0.00660292,0.616425,0.0059311,-0.124901,0.314198,0.0885954,-0.0311746,-0.0353941,-0.0415042,-0.300126
dx,-0.078348,-0.519595,0.0198944,1.0,-0.0628516,0.125939,-0.0189143,0.0497827,-0.233156,-0.108937,0.0093218,0.0282859,0.0214713,0.0031819,-0.0304839,-0.126896
dy,0.272085,0.0285593,-0.222081,-0.0628516,1.0,-0.800562,-0.0795758,-0.144235,-0.019583,0.352457,0.0188255,0.0414798,-0.0140942,-0.00947104,-0.0237462,0.920849
safe_speed,-0.211768,-0.0217595,0.278094,0.125939,-0.800562,1.0,0.0165114,0.207458,-0.00168503,-0.241875,0.0677376,-0.0247671,-0.0166428,-0.0023936,0.0271208,-0.84973
tilt,-0.048435,-0.501479,-0.00660292,-0.0189143,-0.0795758,0.0165114,1.0,-0.0363554,0.00589883,-0.0317557,-0.00493688,0.0037618,0.192785,-0.171447,-0.00523176,-0.058002
safe_tilt,-0.239975,0.0240652,0.616425,0.0497827,-0.144235,0.207458,-0.0363554,1.0,0.0108399,-0.0196764,0.331684,0.00916213,-0.0536604,-0.0464895,0.0381184,-0.215639
rotation,-0.00448124,0.00224287,0.0059311,-0.233156,-0.019583,-0.00168503,0.00589883,0.0108399,1.0,-0.0276014,0.000956344,0.0284676,-0.0319936,-0.00165025,-0.0011199,-0.00398637
time_since_thrust,0.408703,0.152216,-0.124901,-0.108937,0.352457,-0.241875,-0.0317557,-0.0196764,-0.0276014,1.0,0.0232219,-0.402756,-0.00682183,0.0181224,0.334506,0.313566


In [62]:
#data_df.plot("x_diff", "distance_to_ground", kind="scatter", c="Speed", cmap='RdYlGn')

In [63]:
#data_df.plot("x_diff", "distance_to_ground", kind="scatter", c="tilt", cmap='RdYlGn')

In [64]:
#data_df.plot("x_diff", "distance_to_ground", kind="scatter", c="time_since_thrust", cmap='RdYlGn')

## NN training

In [65]:
data_df.dtypes

distance_to_ground    float64
x_diff                float64
over_lz                 int64
dx                    float64
dy                    float64
safe_speed              int64
tilt                  float64
safe_tilt               int64
rotation              float64
time_since_thrust     float64
time_since_tilt       float64
W                       int64
A                       int64
D                       int64
Not_pressed             int64
Speed                 float64
dtype: object

In [70]:
features = data_df
features = features.drop(columns=["W", "A", "D", "Not_pressed"])

labels = data_df
labels = labels.drop(columns=["distance_to_ground",
                             "x_diff",
                            "over_lz",
                            "dx",
                            "dy",
                            "safe_speed",
                            "tilt",
                            "safe_tilt",
                            "rotation",
                            "time_since_thrust",
                            "time_since_tilt",
                            "Speed"])


In [84]:
# Imports
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import Adam

# Configuration options
n_samples = 10000
n_features = 6
n_classes = 3
n_labels = 2
n_epochs = 50
random_state = 42
batch_size = 250
verbosity = 1
validation_split = 0.2

# Create dataset
X, y = make_multilabel_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_labels=n_labels, random_state=random_state)

print(np.shape(X))
print(np.shape(y))

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=random_state)

print(np.shape(X_train))
print(np.shape(X_test))
print(np.shape(y_train))
print(np.shape(y_test))

# Create the model
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=n_features))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(n_classes, activation='sigmoid'))

# Compile the model
model.compile(loss=binary_crossentropy,
              optimizer=Adam(),
              metrics=['accuracy'])

# Fit data to model
#model.fit(X_train, y_train,
#          batch_size=batch_size,
#          epochs=n_epochs,
#          verbose=verbosity,
#          validation_split=validation_split)

# Generate generalization metrics
score = model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')

(10000, 6)
(10000, 3)
(6700, 6)
(3300, 6)
(6700, 3)
(3300, 3)
Test loss: 0.776869530244307 / Test accuracy: 0.5071717500686646


In [93]:
X = features.to_numpy()

y = labels.to_numpy()

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=random_state)

In [234]:
def get_compiled_model():
    #Create the model
    model = Sequential()
    model.add(Dense(32, use_bias=False, activation='relu', input_dim=12))
    model.add(Dense(16, use_bias=False, activation='relu'))
    model.add(Dense(8, use_bias=False, activation='relu'))
    model.add(Dense(4, use_bias=False, activation='sigmoid'))

    model.compile(loss=binary_crossentropy,
        optimizer=Adam(),
        metrics=['accuracy'])

    return model

In [235]:
logdir = "logs/"

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

model = get_compiled_model()
model.fit(X_train, y_train, 
          epochs=500, 
          validation_data=(X_test, y_test),
          callbacks=[tensorboard_callback])

Train on 24552 samples, validate on 12093 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/5

KeyboardInterrupt: 

In [232]:
score = model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')

Test loss: 2.8106882325829896 / Test accuracy: 0.8172496557235718


In [123]:
model.predict(X_test)

array([[4.7162175e-04, 3.6954880e-05, 5.8174133e-05, 9.9928051e-01],
       [8.0466270e-06, 2.0861626e-07, 5.9604645e-08, 9.9999070e-01],
       [9.9851233e-01, 9.6017122e-04, 1.1910409e-02, 7.0801377e-04],
       ...,
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
       [2.0861626e-07, 4.9315691e-03, 9.7365767e-01, 6.0114205e-02],
       [1.2627478e-04, 1.4151062e-04, 7.3352989e-05, 9.9929273e-01]],
      dtype=float32)

In [124]:
%tensorboard --logdir logs/

Reusing TensorBoard on port 6006 (pid 59072), started 0:23:10 ago. (Use '!kill 59072' to kill it.)

In [133]:
weights = model.get_weights()
for i in range(len(weights)):
    print(type(weights[i]))
    print(np.shape(weights[i]))

<class 'numpy.ndarray'>
(12, 32)
<class 'numpy.ndarray'>
(32,)
<class 'numpy.ndarray'>
(32, 16)
<class 'numpy.ndarray'>
(16,)
<class 'numpy.ndarray'>
(16, 8)
<class 'numpy.ndarray'>
(8,)
<class 'numpy.ndarray'>
(8, 4)
<class 'numpy.ndarray'>
(4,)


In [135]:
model.summary()

Model: "sequential_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_103 (Dense)            (None, 32)                416       
_________________________________________________________________
dense_104 (Dense)            (None, 16)                528       
_________________________________________________________________
dense_105 (Dense)            (None, 8)                 136       
_________________________________________________________________
dense_106 (Dense)            (None, 4)                 36        
Total params: 1,116
Trainable params: 1,116
Non-trainable params: 0
_________________________________________________________________


In [256]:
test_X = np.array([[1., 1.], [2., 2.]])
test_y = np.array([[1., 0.], [0., 2.]])

print("features")
print(test_X)
print("labels")
print(test_y)
print("")

test_model = Sequential()
test_model.add(Dense(2, activation='relu', input_dim=2, use_bias=False))
test_model.add(Dense(2, activation='sigmoid', use_bias=False))

test_model.compile(loss=tf.keras.losses.MeanAbsoluteError(),
                    optimizer=Adam(),
                  metrics=['accuracy'])

test_model.fit(test_X, test_y, epochs=700, verbose=0)

test_model.summary()

print("")
print("-- predict --")
print(test_model.predict(test_X, verbose=1))

test_weights = test_model.get_weights()

print("")
print("-- weights --")

for i in range(len(test_weights)):
    print(f"layer : {i}")
    print(test_weights[i])

def sigmoid(x):
    return (1.0 / (1.0 + np.exp(-x)))

def relu(x):
    return max(x, 0)
    
print("")
print("reproduction")
vectorized_relu = np.vectorize(relu)

layer_1 = np.matmul(test_X, test_weights[0])

layer_1 = vectorized_relu(layer_1)

layer_2 = np.matmul(layer_1, test_weights[1])
layer_2 = sigmoid(layer_2)

print(layer_2)

features
[[1. 1.]
 [2. 2.]]
labels
[[1. 0.]
 [0. 2.]]

Model: "sequential_85"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_180 (Dense)            (None, 2)                 4         
_________________________________________________________________
dense_181 (Dense)            (None, 2)                 4         
Total params: 8
Trainable params: 8
Non-trainable params: 0
_________________________________________________________________

-- predict --
[[2.5384155e-01 1.0661717e-02]
 [1.0372952e-01 1.1612192e-04]]

-- weights --
layer : 0
[[0.3724537  0.75396734]
 [0.6224514  1.2540973 ]]
layer : 1
[[-1.1770039  -1.667168  ]
 [ 0.04620339 -1.4300848 ]]

reproduction
[[2.53841544e-01 1.06617172e-02]
 [1.03729519e-01 1.16121932e-04]]
