In [86]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

In [87]:
#Read data
frailty_data = pd.read_csv('raw_frailty_data.csv')
print(frailty_data)

   Height  Weight  Age  Grip Strength Frailty
0    65.8     112   30             30       N
1    71.5     136   19             31       N
2    69.4     153   45             29       N
3    68.2     142   22             28       Y
4    67.8     144   29             24       Y
5    68.7     123   50             26       N
6    69.8     141   51             22       Y
7    70.1     136   23             20       Y
8    67.9     112   17             19       N
9    66.8     120   39             31       N


In [88]:
data = {
    'Height': [65.8, 71.5, 69.4, 68.2, 67.8, 68.7, 69.8, 70.1, 67.9, 66.8],
    'Weight': [112, 136, 153, 142, 144, 123, 141, 136, 112, 120],
    'Age': [30, 19, 45, 22, 29, 50, 51, 23, 17, 39],
    'Grip strength': [30, 31, 29, 28, 24, 26, 22, 20, 19, 31],
    'Frailty': ['N', 'N', 'N', 'Y', 'Y', 'N', 'Y', 'Y', 'N', 'N']
}

df = pd.DataFrame(data)
missing_values = df.isnull().sum().sum()
print(f"Number of missing values: {missing_values}")
grip_strength_mean = df['Grip strength'].mean()
grip_strength_std = df['Grip strength'].std()

Number of missing values: 0


**Data Processing**

In [89]:
#Convert Frailty column to binary
label = LabelEncoder()
frailty_data['Frailty']=label.fit_transform(frailty_data['Frailty'])

frailty_data.to_csv('clean_frailty_data.csv',index=False)
frailty_data

Unnamed: 0,Height,Weight,Age,Grip Strength,Frailty
0,65.8,112,30,30,0
1,71.5,136,19,31,0
2,69.4,153,45,29,0
3,68.2,142,22,28,1
4,67.8,144,29,24,1
5,68.7,123,50,26,0
6,69.8,141,51,22,1
7,70.1,136,23,20,1
8,67.9,112,17,19,0
9,66.8,120,39,31,0


In [90]:
X = frailty_data.iloc[:,:-1]
y = frailty_data.iloc[:,-1]
X


Unnamed: 0,Height,Weight,Age,Grip Strength
0,65.8,112,30,30
1,71.5,136,19,31
2,69.4,153,45,29
3,68.2,142,22,28
4,67.8,144,29,24
5,68.7,123,50,26
6,69.8,141,51,22
7,70.1,136,23,20
8,67.9,112,17,19
9,66.8,120,39,31


In [91]:
#split train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state= 42)

In [92]:
ss_train = StandardScaler()
X_train = ss_train.fit_transform(X_train)
ss_test = StandardScaler()
X_test = ss_test.fit_transform(X_test)

In [93]:
model = LogisticRegression(solver='liblinear', random_state=0)
model.fit(X,y)
predictions = model.predict(X_test)



In [94]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=0, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)
model = LogisticRegression(solver='liblinear', random_state=0).fit(X, y)


In [95]:
model.predict(X)
model.score(X, y)

1.0

In [96]:
confusion_matrix(y, model.predict(X))

array([[6, 0],
       [0, 4]])

In [97]:
TN, FP, FN, TP = confusion_matrix(y, model.predict(X)).ravel()
accuracy =  (TP + TN) / (TP + FP + TN + FN)

print('Accuracy of the binary classifier = {:0.3f}'.format(accuracy))

Accuracy of the binary classifier = 1.000
