# Initializing libraries

In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (max_error, mean_absolute_error, mean_squared_error, r2_score,
                             confusion_matrix, accuracy_score, recall_score, precision_score, f1_score)
from imblearn.over_sampling import SMOTE

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder

# Reading dataset

In [29]:
db = pd.read_csv('diabetes.csv')
db

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,Female,80.0,0,1,never,25.19,6.6,140,0
1,Female,54.0,0,0,No Info,27.32,6.6,80,0
2,Male,28.0,0,0,never,27.32,5.7,158,0
3,Female,36.0,0,0,current,23.45,5.0,155,0
4,Male,76.0,1,1,current,20.14,4.8,155,0
...,...,...,...,...,...,...,...,...,...
99995,Female,80.0,0,0,No Info,27.32,6.2,90,0
99996,Female,2.0,0,0,No Info,17.37,6.5,100,0
99997,Male,66.0,0,0,former,27.83,5.7,155,0
99998,Female,24.0,0,0,never,35.42,4.0,100,0


# Encoding dataset

In [30]:
db.dropna(inplace=True)
db.columns

Index(['gender', 'age', 'hypertension', 'heart_disease', 'smoking_history',
       'bmi', 'HbA1c_level', 'blood_glucose_level', 'diabetes'],
      dtype='object')

In [31]:
coding_columns = ["gender","smoking_history" ]
OEnc = OrdinalEncoder()

OEnc

In [32]:
OEnc.fit(db[coding_columns])
OEnc.transform(db[coding_columns])

array([[0., 4.],
       [0., 0.],
       [1., 4.],
       ...,
       [1., 3.],
       [0., 4.],
       [0., 1.]])

# Inputing row

In [33]:
db[coding_columns] = OEnc.fit_transform(db[coding_columns])
db

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,0.0,80.0,0,1,4.0,25.19,6.6,140,0
1,0.0,54.0,0,0,0.0,27.32,6.6,80,0
2,1.0,28.0,0,0,4.0,27.32,5.7,158,0
3,0.0,36.0,0,0,1.0,23.45,5.0,155,0
4,1.0,76.0,1,1,1.0,20.14,4.8,155,0
...,...,...,...,...,...,...,...,...,...
99995,0.0,80.0,0,0,0.0,27.32,6.2,90,0
99996,0.0,2.0,0,0,0.0,17.37,6.5,100,0
99997,1.0,66.0,0,0,3.0,27.83,5.7,155,0
99998,0.0,24.0,0,0,4.0,35.42,4.0,100,0


# Linear regression

In [34]:
x_reg = db.drop("diabetes", axis=1)
y_reg = db['diabetes']

### В машинном обучении StandardScaler (стандартный масштабатор) используется для изменения размера распределения значений так, чтобы среднее значение наблюдаемых значений было равно 0, а стандартное отклонение – 1.

In [35]:
x_train, x_test, y_train, y_test = train_test_split(x_reg, y_reg, test_size=0.2, random_state=89)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)



# Creating moder

In [36]:
model = Sequential([
    Input(shape=(x_train_scaled.shape[1],)),
    Dense(100, activation="relu"),
    Dense(50, activation="relu"),
    Dense(1)
])

# Learning model

In [None]:
model.compile(optimizer = Adam(learning_rate =0.001), loss="mse")
model.fit(x_train, y_train, epochs=100, batch_size =32, verbose=0)
y_pred = model.predict(x_test, verbose =0).flatten()

max_err = max_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)



# Metrics

In [None]:
print("Regression metrics")
print(f"Max error {max_err:.2f}")
print(f"MAE {mae:.2f}")
print(f"MSE {mse:.2f}")
print(f"Coeffient determination R2 {r2:.2f}")

# Classification