In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
data = load_breast_cancer()

In [3]:
data.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [4]:
data.data.shape[0]

569

In [5]:
X = data.data
y = data.target

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
#Feature Scaling
model = SVC()
model.fit(X_train, y_train)

In [8]:
y_pred = model.predict(X_test)
acc_sv = int(accuracy_score(y_test, y_pred) * 100)
print("Accuracy: ", acc_sv)

Accuracy:  94


In [9]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

In [10]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 0.06410886247029429
R-squared: 0.7271016126223568


In [11]:
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mae)

Mean Absolute Error: 0.19690374465646368


In [12]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)


In [13]:
y_pred = rf_classifier.predict(X_test)

In [14]:
acc_rf = int(accuracy_score(y_test, y_pred) * 100)
print("Accuracy:", acc_rf)


Accuracy: 96


In [15]:
conf_matrix = confusion_matrix(y_test, y_pred)
TP = conf_matrix[1, 1]
FP = conf_matrix[0, 1]
TN = conf_matrix[0, 0]
FN = conf_matrix[1, 0]
print("Confusion Matrix:")
print(conf_matrix)
print("True Positive (TP):", TP)
print("False Positive (FP):", FP)
print("True Negative (TN):", TN)
print("False Negative (FN):", FN)

Confusion Matrix:
[[40  3]
 [ 1 70]]
True Positive (TP): 70
False Positive (FP): 3
True Negative (TN): 40
False Negative (FN): 1


In [19]:
# Function to predict the condition
def predict_condition(user_input):
    # Convert user input to a numpy array
    user_input_np = np.array(user_input).reshape(1, -1)

    # Make a prediction using the trained model
    prediction = rf_classifier.predict(user_input_np)

    # Display the diagnosis result
    if prediction[0] == 0:
        return "Condition: Benign (Non-Cancerous)"
    else:
        return "Condition: Malignant (Cancerous)"

if __name__ == "__main__":
    # Get user input for the 30 features
    user_input = []
    for i in range(30):
        feature_value = float(input(f"Enter value for feature '{data.feature_names[i]}': "))
        user_input.append(feature_value)

    # Make the prediction
    result = predict_condition(user_input)
    print(result)

Condition: Malignant (Cancerous)
