# Proyecto 2

**Tema:** Predicción de enfermedades Cardíacas


|     Nombres      |      Apellidos       |     Login     |  Codigo   |
| :--------------: | :------------------: | :-----------: | :-------: |
|     Santiago     | Gonzalez Montealegre | s.gonzalez35  | 202012274 |
| Juliana Carolina |  Cardenas Barragan   | jc.cardenasb1 | 202011683 |


## Medidas de Desempeño - Original

## K-Fold Cross Validation

## Librerías Requeridas

In [1]:
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.model_selection import KFold

## Carga de Datos

In [2]:
df = pd.read_csv("../../../Data/Transformed/Discrete.csv")

## K-Fold

In [3]:
variables = [column for column in list(df.columns) if column != 'heartdis']

X = df[variables].values
y = df['heartdis'].values

kf = KFold(n_splits=4, shuffle=True, random_state=1)

splits = list(kf.split(X))

## Test

In [4]:
def inferenceEvidence(evidence):
    
    infer = VariableElimination(model)

    prob = infer.query(variables=['heartdis'], evidence=evidence)
    
    return prob.values.tolist()

In [5]:
def getEvidence(nodes, row):
    
    evidence = {}
    for node in nodes:
        if node != 'heartdis':
            evidence[node] = (row[node])
    
    return evidence

In [6]:
def getClassification(probs):
    cero = probs[0]
    one = probs[1]
    if cero >= one:
        result = 0
    else:
        result = 1
        
    return result

In [7]:
def getMetrics(df_test):
    
    # confussion_matrix = [[None, None],[None, None]]
    
    metrics = {'TN': None, 'FP': None, 'FN': None, 'TP': None}
    
    df_needed = df_test[['heartdis', 'test']]
    
    true_negatives_df = df_needed[(df_needed['heartdis'] == df_needed['test']) & (df_needed['test'] == 0)]
    true_negatives = true_negatives_df['test'].count()
    
    false_positives_df = df_needed[(df_needed['heartdis'] != df_needed['test']) & (df_needed['test'] == 1)]
    false_positives = false_positives_df['test'].count()
    
    false_negatives_df = df_needed[(df_needed['heartdis'] != df_needed['test']) & (df_needed['test'] == 0)]
    false_negatives = false_negatives_df['test'].count()
    
    
    true_positives_df = df_needed[(df_needed['heartdis'] == df_needed['test']) & (df_needed['test'] == 1)]
    true_positives = true_positives_df['test'].count()
    
    metrics['TN'] = true_negatives
    metrics['FP'] = false_positives
    metrics['FN'] = false_negatives
    metrics['TP'] = true_positives
    
    return metrics

In [8]:
def trainModel(df_train):
    model = BayesianNetwork(
        [
            ('age', 'ca'),
            ('age', 'thalach'),
            ('age', 'trestbps'),
            ('sex', 'heartdis'),
            ('sex', 'thal'),
            ('slope', 'heartdis'),
            ('slope', 'oldpeak'),
            ('slope', 'thalach'),
            ('ca', 'heartdis'),
            ('exang', 'cp'),
            ('exang', 'oldpeak'),
            ('exang', 'thalach'),
            ('thal', 'exang'),
            ('thal', 'heartdis'),
            ('thal', 'oldpeak'),
            ('heartdis', 'cp'),
            ('heartdis', 'oldpeak'),
        ]
    )

    model.fit (
        data=df_train,
        estimator=MaximumLikelihoodEstimator,    
    )
    model.check_model()
    
    return model

In [9]:
# Iterate over the K-Fold

kfold_metrics = []
for train_index, test_index in splits:
    
    # Get the Train and Test set
    
    X_train = X[train_index]
    y_train = y[train_index]
    
    X_test = X[test_index]
    y_test = y[test_index]
    
    df_train = pd.DataFrame(X_train, columns=variables)
    df_train['heartdis'] = y_train

    df_test = pd.DataFrame(X_test, columns=variables)
    df_test['heartdis'] = y_test
    
    # Train the Model
    
    model = trainModel(df_train)
    
    nodes = model.nodes    
    
    # Test the model
    
    test = []
    for i in range(0, len(df_test)):
        row = df_test.iloc[i]
        evidence = getEvidence(nodes=nodes, row=row)
        probs = inferenceEvidence(evidence)
        classification = getClassification(probs)
        test.append(classification)

    df_test['test'] = test
    
    metrics = getMetrics(df_test)
    
    kfold_metrics.append(metrics)

df_performance = pd.DataFrame(kfold_metrics)

df_performance['accuracy'] = (df_performance['TN'] + df_performance['TP']) / (df_performance['TN'] + df_performance['FP'] + df_performance['FN'] + df_performance['TP'])
df_performance['precision'] = (df_performance['TP']) / (df_performance['FP'] + df_performance['TP'])
df_performance['recall'] = (df_performance['TP']) / (df_performance['FN'] + df_performance['TP'])
df_performance['f1_score'] = 2 * (df_performance['precision'] * df_performance['recall']) / (df_performance['precision'] + df_performance['recall'])

  phi.values = phi.values / phi.values.sum()
  phi.values = phi.values / phi.values.sum()
  phi.values = phi.values / phi.values.sum()
  phi.values = phi.values / phi.values.sum()


In [21]:
df_performance

Unnamed: 0,TN,FP,FN,TP,accuracy,precision,recall,f1_score
0,27,14,5,30,0.75,0.681818,0.857143,0.759494
1,39,6,11,20,0.776316,0.769231,0.645161,0.701754
2,29,8,8,31,0.789474,0.794872,0.794872,0.794872
3,31,10,10,24,0.733333,0.705882,0.705882,0.705882


In [25]:
accuracy = df_performance['accuracy'].mean()
precision = df_performance['precision'].mean()
recall = df_performance['recall'].mean()
f1_score = df_performance['f1_score'].mean()

print(f'accuracy: \t {round(accuracy, 2)}')
print(f'precision: \t {round(precision, 2)}')
print(f'recall: \t {round(recall, 2)}')
print(f'f1_score: \t {round(f1_score, 2)}')

accuracy: 	 0.76
precision: 	 0.74
recall: 	 0.75
f1_score: 	 0.74
