In [None]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Cargar el conjunto de datos
df = pd.read_csv("Dataset/covid.csv")

In [None]:
# Eliminar columnas innecesarias
eliminar_columnas = ['id', 'patient_type','entry_date','date_symptoms','date_died','other_disease','icu']
df.drop(eliminar_columnas, axis=1, inplace=True)

In [None]:
# Filtrar filas con valores de 98 y 99 en columnas específicas
df = df[(df['covid_res'] != 3) & (df['contact_other_covid'] != 99) & (df['tobacco'] != 98) & 
        (df['obesity'] != 98) & (df['copd'] != 98) & (df['diabetes'] != 98) & 
        (df['asthma'] != 98) & (df['inmsupr'] != 98) & (df['hypertension'] != 98) & 
        (df['cardiovascular'] != 98) & (df['renal_chronic'] != 98) & (df['intubed'] != 98) & 
        (df['pregnancy'] != 98) & (df['pneumonia'] != 98)]

In [None]:
# Asignar variables independientes y dependientes
X = df.drop('covid_res', axis=1)
y = df['covid_res']

In [None]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Entrenar el modelo Naive Bayes
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)

In [None]:
# Obtener las predicciones en el conjunto de prueba
y_pred = naive_bayes.predict(X_test)

In [None]:
# Evaluar el desempeño del modelo
accuracy = metrics.accuracy_score(y_test, y_pred)
print("La precisión del modelo es: {:.2f}% ".format(accuracy*100))

In [None]:
# Crear la matriz de confusion
cm = confusion_matrix(y_test, y_pred)

In [None]:
# Graficar la matriz de confusion
sns.heatmap(cm, annot=True, fmt='g')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# Crear una fila con los datos del usuario
usuario = pd.DataFrame({
    'sex':2,
    'intubed':2,
    'pneumonia':2,
    'age':35,
    'pregnancy':2,
    'diabetes':2,
    'copd':2,
    'asthma':2,
    'inmsupr':2,
    'hypertension':2,
    'cardiovascular':2,
    'obesity':2,
    'renal_chronic':2,
    'tobacco':2,
    'contact_other_covid':2,
}, index=[0])

In [None]:
# Realizar prediccion
probabilidad = naive_bayes.predict_proba(usuario)[0][1]

In [None]:
# Imprimir el resultado de la probabilidad
print("Probabilidad de COVID 19: {:2.0f}%".format(probabilidad*100))