In [155]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

#Importing Dataset
dataset=pd.read_csv('D:\Programming\Datasets\Classification\Heart_Failure\heart.csv')

#relabel values in columns to be numeric
label_encoder = LabelEncoder()
dataset['Sex'] = label_encoder.fit_transform(dataset['Sex'])
dataset['ChestPainType'] = label_encoder.fit_transform(dataset['ChestPainType'])
dataset['RestingECG'] = label_encoder.fit_transform(dataset['RestingECG'])
dataset['ExerciseAngina'] = label_encoder.fit_transform(dataset['ExerciseAngina'])
dataset['ST_Slope'] = label_encoder.fit_transform(dataset['ST_Slope'])

X=dataset[['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS', 'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope']].values
y=dataset[['HeartDisease']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_train = y_train.ravel()
y_test = y_test.ravel()

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [156]:
X_corrupt_25 = X_train.copy()
X_corrupt_50 = X_train.copy()
X_corrupt_75 = X_train.copy()

# Introduce noise to a percentage of the training data
corrupt_25 = int(len(X_train) * 25 / 100)
corrupt_50 = int(len(X_train) * 50 / 100)
corrupt_75 = int(len(X_train) * 75 / 100)

# Randomly select samples to corrupt
indices_corrupt_25 = np.random.choice(len(X_corrupt_25), corrupt_25, replace=False)
indices_corrupt_50 = np.random.choice(len(X_corrupt_50), corrupt_50, replace=False)
indices_corrupt_75 = np.random.choice(len(X_corrupt_75), corrupt_75, replace=False)

# Introduce noise to selected samples
X_corrupt_25[indices_corrupt_25, :] = np.random.normal(0, 10, size=X_corrupt_25[indices_corrupt_25, :].shape)
X_corrupt_50[indices_corrupt_50, :] = np.random.normal(0, 10, size=X_corrupt_50[indices_corrupt_50, :].shape)
X_corrupt_75[indices_corrupt_75, :] = np.random.normal(0, 10, size=X_corrupt_75[indices_corrupt_75, :].shape)

In [157]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

rf_25 = RandomForestClassifier(n_estimators=100)
rf_25.fit(X_corrupt_25, y_train)
y_pred_25 = rf_25.predict(X_test)

rf_50 = RandomForestClassifier(n_estimators=100)
rf_50.fit(X_corrupt_50, y_train)
y_pred_50 = rf_50.predict(X_test)

rf_75 = RandomForestClassifier(n_estimators=100)
rf_75.fit(X_corrupt_75, y_train)
y_pred_75 = rf_75.predict(X_test)

In [158]:
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
tp, fp, fn, tn = conf_matrix.ravel()
print("TP:" + str(tp) + " TN:" + str(tn) + " FP:" + str(fp) + " FN:" + str(fn))
print(f"Random Forest Accuracy 0% Noise: {accuracy:.2f}")
print(f"Random Forest F1-Score 0% Noise: {f1:.2f}")
print("")

accuracy_25 = accuracy_score(y_test, y_pred_25)
f1_25 = f1_score(y_test, y_pred_25)
conf_matrix = confusion_matrix(y_test, y_pred_25)
tp, fp, fn, tn = conf_matrix.ravel()
print("TP:" + str(tp) + " TN:" + str(tn) + " FP:" + str(fp) + " FN:" + str(fn))
print(f"Random Forest Accuracy 25% Noise: {accuracy_25:.2f}")
print(f"Random Forest F1-Score 25% Noise: {f1_25:.2f}")
print("")

accuracy_50 = accuracy_score(y_test, y_pred_50)
f1_50 = f1_score(y_test, y_pred_50)
conf_matrix = confusion_matrix(y_test, y_pred_50)
tp, fp, fn, tn = conf_matrix.ravel()
print("TP:" + str(tp) + " TN:" + str(tn) + " FP:" + str(fp) + " FN:" + str(fn))
print(f"Random Forest Accuracy 50% Noise: {accuracy_50:.2f}")
print(f"Random Forest F1-Score 50% Noise: {f1_50:.2f}")
print("")

accuracy_75 = accuracy_score(y_test, y_pred_75)
f1_75 = f1_score(y_test, y_pred_75)
conf_matrix = confusion_matrix(y_test, y_pred_75)
tp, fp, fn, tn = conf_matrix.ravel()
print("TP:" + str(tp) + " TN:" + str(tn) + " FP:" + str(fp) + " FN:" + str(fn))
print(f"Random Forest Accuracy 75% Noise: {accuracy_75:.2f}")
print(f"Random Forest F1-Score 75% Noise: {f1_75:.2f}")
print("")

TP:67 TN:95 FP:10 FN:12
Random Forest Accuracy 0% Noise: 0.88
Random Forest F1-Score 0% Noise: 0.90

TP:67 TN:94 FP:10 FN:13
Random Forest Accuracy 25% Noise: 0.88
Random Forest F1-Score 25% Noise: 0.89

TP:66 TN:98 FP:11 FN:9
Random Forest Accuracy 50% Noise: 0.89
Random Forest F1-Score 50% Noise: 0.91

TP:68 TN:92 FP:9 FN:15
Random Forest Accuracy 75% Noise: 0.87
Random Forest F1-Score 75% Noise: 0.88

