In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm 


In [67]:
df = pd.read_csv("Surgical-deepnet.csv")
df

Unnamed: 0,bmi,Age,asa_status,baseline_cancer,baseline_charlson,baseline_cvd,baseline_dementia,baseline_diabetes,baseline_digestive,baseline_osteoart,...,complication_rsi,dow,gender,hour,month,moonphase,mort30,mortality_rsi,race,complication
0,19.31,59.2,1,1,0,0,0,0,0,0,...,-0.57,3,0,7.63,6,1,0,-0.43,1,0
1,18.73,59.1,0,0,0,0,0,0,0,0,...,0.21,0,0,12.93,0,1,0,-0.41,1,0
2,21.85,59.0,0,0,0,0,0,0,0,0,...,0.00,2,0,7.68,5,3,0,0.08,1,0
3,18.49,59.0,1,0,1,0,0,1,1,0,...,-0.65,2,1,7.58,4,3,0,-0.32,1,0
4,19.70,59.0,1,0,0,0,0,0,0,0,...,0.00,0,0,7.88,11,0,0,0.00,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14630,18.79,14.1,1,0,1,0,0,0,0,0,...,-0.54,1,0,7.78,0,1,0,-0.16,1,1
14631,19.65,12.6,0,0,0,0,0,0,0,0,...,-1.42,4,0,8.40,6,1,0,-0.77,1,1
14632,14.84,12.6,1,0,0,0,0,0,0,0,...,0.65,0,0,13.25,3,3,0,0.99,1,1
14633,17.75,8.9,0,0,1,0,0,0,1,0,...,-0.50,0,1,8.30,5,0,0,0.17,1,1


In [68]:
df.isnull().sum()

bmi                    0
Age                    0
asa_status             0
baseline_cancer        0
baseline_charlson      0
baseline_cvd           0
baseline_dementia      0
baseline_diabetes      0
baseline_digestive     0
baseline_osteoart      0
baseline_psych         0
baseline_pulmonary     0
ahrq_ccs               0
ccsComplicationRate    0
ccsMort30Rate          0
complication_rsi       0
dow                    0
gender                 0
hour                   0
month                  0
moonphase              0
mort30                 0
mortality_rsi          0
race                   0
complication           0
dtype: int64

In [69]:
X = df.drop("complication" , axis=1).copy().values
Y = df["complication"].copy().values


X_train , X_test , Y_train , Y_test = train_test_split(X , Y , test_size=0.2 , shuffle=True)
X_train.shape , X_test.shape , Y_train.shape , Y_test.shape
Y_train = Y_train.reshape(-1 ,1)
Y_test = Y_test.reshape(-1  ,1)
X_train.shape , X_test.shape , Y_train.shape , Y_test.shape


((11708, 24), (2927, 24), (11708, 1), (2927, 1))

In [70]:
class Perceptron:
    def __init__(self , input_length , lr  ):
        self.weights = np.random.rand(input_length)
        self.bias = np.random.rand(1)
        self.lr = lr


    def activation(self, x, function="sigmoid"):
        if function == "sigmoid" :
            return 1 / (1 + np.exp(-x))
        
        elif function == "relu" :
            return np.maximum(0,x)
        
        elif function == "tanh" :
            return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
        
        else:
            raise Exception("activation func not found")
        



    def fit(self, X_train , Y_train , epochs):

        for epoch in tqdm(range(epochs)) :

            for x_train , y_train in zip(X_train , Y_train):


                # FORWARDING 
                y_pred = self.activation( x_train @ self.weights + self.bias)


                # BACKPROPAGATION ----with (y_pred , Y_train , x_train)
                # در مرحله بکپروپگیشین فقط گرادیان ها رو حساب می کنیم
                # y = x @ w + b   ---> dw = x , db = 1
                # d means = Geradian
                error = y_pred - y_train
                # مشتق * ارور = گرادیان
                # مشتق دبلیو  * ارور = گرادیان دبلیو 
                dw = error * x_train      
                db = error
                # inja , x_train = moshtaghe w 


                # UPDATE : استفاده از گرادیان های بدست آمده 
                self.weights = self.weights  - self.lr * dw
                self.bias = self.bias - self.lr * db


    def predict(self , X_test):
        Y_pred = []
        for x_test in X_test :
            
            # amaliate forward 
            y_pred = self.activation(x_test @ self.weights + self.bias)
            Y_pred.append(y_pred)

        return np.array(Y_pred)
        



    def accuracy(self , X_test , Y_test):

        Y_pred = self.predict(X_test)
        Y_pred = Y_pred > 0.5
        acc = np.mean(Y_pred == Y_test)
        return acc


    def calc_loss(self , X_test , Y_test , metric="mse"):
        y_pred = self.predict(X_test)

        if metric == "mse" :
            loss = np.mean(y_pred - Y_test)
        elif metric == "mae":
            loss = np.mean(np.abs(y_pred - Y_test))
        else :
            raise Exception("supported metric not found")

        return loss



    def evaluate(self , X_test , Y_test) :

        LOSS = self.calc_loss(X_test , Y_test)
        ACCURACY = self.accuracy(X_test , Y_test)
        return LOSS , ACCURACY




In [71]:
model = Perceptron(X.shape[1] , lr= 0.0001)
model.fit(X_train , Y_train , epochs=100)


100%|██████████| 100/100 [00:12<00:00,  8.03it/s]


In [72]:
loss , acc = model.evaluate(X_test , Y_test)
loss , acc

(-0.07538613732795098, 0.7782712675093952)

In [73]:
Y_pred = model.predict(X_test)
Y_pred = Y_pred > 0.5

In [74]:
# confusion-matrix

from sklearn.metrics import confusion_matrix
confusion_matrix(Y_test , Y_pred)

array([[2162,   38],
       [ 611,  116]], dtype=int64)

In [75]:
## TT | FT
## TF | FF

# precision & recall

In [76]:
## TT | FT
## TF | FF
# DAR MESAL CORONA , RECALL MOHEM TARE

#PRECISION = TP / (TP + FP) 
#RECALL = TP / ( TP + FN)
#F1_SCORE = 2 * ( PRECISION * RECALL ) / ( PRECISION + RECALL )

#if FP == FN :
    # به جای اف۱ اسکور - همون اکیوریسی رو حساب می کنیم

# dar mesale canser har do meyair mohem and 
from sklearn.metrics import precision_score , recall_score

print("precision score : " , precision_score(Y_test , Y_pred))
print("recall score : " , recall_score(Y_test , Y_pred))


precision score :  0.7532467532467533
recall score :  0.15955983493810177
