# Logistic Regression

Predicts gender based on weight and height.

In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, f1_score

*Note:* `1 - women, 2 - men`

In [47]:
data = pd.read_csv('./data/cardio.csv', delimiter=';')
data = data[['weight', 'height', 'gender']]

data['gender'] -= 1

train, test = train_test_split(data)

train.head(5)

Unnamed: 0,weight,height,gender
37237,89.0,164,0
47763,62.0,165,0
47078,71.0,168,0
55424,74.0,165,1
14057,80.0,164,0


Ensure gender values are $0$ or $1$.

In [48]:
genders = sorted(data['gender'].unique())
np.testing.assert_array_equal(genders, [0, 1])

In [32]:
train_x = train[['weight', 'height']]
train_y = train['gender'].ravel()

test_x = test[['weight', 'height']]
test_y = test['gender'].ravel()

In [33]:
model = LogisticRegression(random_state=0).fit(train_x, train_y)

In [36]:
predicted_y = model.predict(test_x)

In [35]:
print(confusion_matrix(test_y, predicted_y))
print(f1_score(test_y, predicted_y, pos_label=0))

[[9900 1471]
 [2523 3606]]
0.8321425569471295
