In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Load the heart disease dataset
data = pd.read_csv('Prostate_Cancer.csv')

In [5]:
# Split the dataset into features and target variable
X = data.drop('diagnosis_result', axis=1)
y = data['diagnosis_result']

In [7]:
# show the first 5 rows of the dataset of x and y
# print(X.head())
print(y.head())

0    M
1    B
2    M
3    M
4    M
Name: diagnosis_result, dtype: object


In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# Artificial Neural Network (ANN) model
ann_model = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42)
ann_model.fit(X_train, y_train)
ann_predictions = ann_model.predict(X_test)
ann_accuracy = accuracy_score(y_test, ann_predictions)
print("ANN Accuracy:", ann_accuracy)

ANN Accuracy: 0.9




In [11]:

# Decision Tree (DT) model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
print("DT Accuracy:", dt_accuracy)

DT Accuracy: 0.8


In [21]:
# Logestic Regression (LR) model
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression(random_state=56, max_iter=10000, solver='newton-cg', multi_class='ovr', C=0.5)
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_predictions)
print("LR Accuracy:", lr_accuracy)


LR Accuracy: 0.85


In [12]:
# Combine predictions using a hybrid approach
hybrid_predictions = []
for i in range(len(X_test)):
    ann_prediction = ann_predictions[i]
    dt_prediction = dt_predictions[i]

    if ann_prediction == dt_prediction:
        hybrid_predictions.append(ann_prediction)
    else:
        hybrid_predictions.append(dt_prediction)

hybrid_accuracy = accuracy_score(y_test, hybrid_predictions)
print("Hybrid Accuracy:", hybrid_accuracy)

Hybrid Accuracy: 0.8
