In [14]:
import pandas as pd
import numpy as np
from keras import Sequential
from keras.src.callbacks import ModelCheckpoint
from keras.src.layers import Dense
import plotly.express as px
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
from plotly import graph_objects as go


In [15]:
data = pd.read_csv("dataset.txt")
labels = data["Plec"]
data = data.drop('Plec', axis=1)

scaler = StandardScaler()
data = scaler.fit_transform(data)

[data, labels]

[array([[-0.56254337, -0.34986292, -0.2895595 , -0.30058241, -0.4164119 ],
        [ 1.01963612,  1.23021053,  1.20458575,  1.25964843,  1.27073949],
        [-1.57965876, -1.73242718, -1.63673965, -1.64054538, -1.54620078],
        ...,
        [-0.22350491, -0.34986292, -0.41203043, -0.35564938, -0.49173116],
        [-0.78856901, -0.94239046, -1.02438504, -1.08987567, -1.09428523],
        [ 1.13264894,  1.42771971,  1.30256248,  1.20458146,  1.39125031]]),
 0      0
 1      0
 2      1
 3      1
 4      1
       ..
 264    0
 265    0
 266    0
 267    1
 268    0
 Name: Plec, Length: 269, dtype: int64]

In [26]:
model = Sequential()
model.add(Dense(units=2, input_shape=(5,)))
model.add(Dense(units=2, activation="relu"))
model.add(Dense(units=1, activation="sigmoid"))

model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_19 (Dense)            (None, 2)                 12        
                                                                 
 dense_20 (Dense)            (None, 2)                 6         
                                                                 
 dense_21 (Dense)            (None, 1)                 3         
                                                                 
Total params: 21 (84.00 Byte)
Trainable params: 21 (84.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [27]:
mc = ModelCheckpoint(filepath="best_model1.keras", monitor="val_accuracy", save_best_only=True, mode="max", verbose=1)

model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])

history = model.fit(data, labels, epochs=15, batch_size=1, validation_split=0.25, callbacks=[mc])

model.save_weights("model_after_training.keras")

Epoch 1/15
Epoch 1: val_accuracy improved from -inf to 0.73529, saving model to best_model1.keras
Epoch 2/15
Epoch 2: val_accuracy improved from 0.73529 to 0.80882, saving model to best_model1.keras
Epoch 3/15
Epoch 3: val_accuracy did not improve from 0.80882
Epoch 4/15
Epoch 4: val_accuracy did not improve from 0.80882
Epoch 5/15
Epoch 5: val_accuracy improved from 0.80882 to 0.83824, saving model to best_model1.keras
Epoch 6/15
Epoch 6: val_accuracy improved from 0.83824 to 0.86765, saving model to best_model1.keras
Epoch 7/15
Epoch 7: val_accuracy did not improve from 0.86765
Epoch 8/15
Epoch 8: val_accuracy did not improve from 0.86765
Epoch 9/15
Epoch 9: val_accuracy improved from 0.86765 to 0.92647, saving model to best_model1.keras
Epoch 10/15
Epoch 10: val_accuracy did not improve from 0.92647
Epoch 11/15
Epoch 11: val_accuracy improved from 0.92647 to 1.00000, saving model to best_model1.keras
Epoch 12/15
Epoch 12: val_accuracy did not improve from 1.00000
Epoch 13/15
Epoch 1

In [28]:
def draw_history(history):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=history.epoch, y=history.history["accuracy"], name="accuracy"))
    fig.add_trace(go.Scatter(x=history.epoch, y=history.history["val_accuracy"], name="val_accuracy"))
    fig.show()

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=history.epoch, y=history.history["loss"], name="loss"))
    fig.add_trace(go.Scatter(x=history.epoch, y=history.history["val_loss"], name="val_loss"))
    fig.show()


draw_history(history)

In [30]:
model.load_weights("best_model1.keras")
probs = model.predict(data, verbose=0)
classes = probs.round()

cm = confusion_matrix(labels, classes)

print(f"Accuracy: {round(accuracy_score(labels, classes) * 100, 2)}%")

px.imshow(cm, text_auto=True, x=["p_men", "p_wom"], y=["t_men", "t_wom"])

Accuracy: 99.26%


In [21]:
test_data = np.random.randint(80, 100, size=(500, 5))

probs = model.predict(scaler.fit_transform(test_data))
pd.DataFrame({"probs": probs.ravel(), "labels": probs.ravel().round()})




Unnamed: 0,probs,labels
0,0.000700,0.0
1,0.187538,0.0
2,0.029372,0.0
3,0.000177,0.0
4,0.000052,0.0
...,...,...
495,0.000244,0.0
496,0.512688,1.0
497,0.000005,0.0
498,0.034155,0.0
