In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, mean_squared_error

# Loading the dataset as url
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
column_names = [
    "ID", "Diagnosis", "mean_radius", "mean_texture", "mean_perimeter", "mean_area", "mean_smoothness",
    "mean_compactness", "mean_concavity", "mean_concave_points", "mean_symmetry", "mean_fractal_dimension",
    "radius_error", "texture_error", "perimeter_error", "area_error", "smoothness_error",
    "compactness_error", "concavity_error", "concave_points_error", "symmetry_error",
    "fractal_dimension_error", "worst_radius", "worst_texture", "worst_perimeter", "worst_area",
    "worst_smoothness", "worst_compactness", "worst_concavity", "worst_concave_points",
    "worst_symmetry", "worst_fractal_dimension"
]
data = pd.read_csv(url, names=column_names)


data['Diagnosis'] = data['Diagnosis'].map({'M': 1, 'B': 0})  # Convert 'M' and 'B' to binary labels
X = data.drop(columns=['ID', 'Diagnosis'])
y = data['Diagnosis']

# Spliting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#logistic regression model
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

#linear regression model (just for demonstration purposes)
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

#K-Nearest Neighbors (KNN) model
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

#decision tree model
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)

#random forest model
forest_model = RandomForestClassifier()
forest_model.fit(X_train, y_train)


def predict_breast_cancer():
    print("Please enter the following features:")
    user_input = []
    for feature in X.columns:
        value = float(input(f"{feature}: "))
        user_input.append(value)

    #user input
    user_input = np.array(user_input).reshape(1, -1)
    user_input = scaler.transform(user_input)

    # Make predictions
    logistic_prediction = logistic_model.predict(user_input)
    logistic_prediction_proba = logistic_model.predict_proba(user_input)

    linear_prediction = linear_model.predict(user_input)

    knn_prediction = knn_model.predict(user_input)
    knn_prediction_proba = knn_model.predict_proba(user_input)

    tree_prediction = tree_model.predict(user_input)
    tree_prediction_proba = tree_model.predict_proba(user_input)

    forest_prediction = forest_model.predict(user_input)
    forest_prediction_proba = forest_model.predict_proba(user_input)


    print("\nLogistic Regression Prediction:")
    if logistic_prediction[0] == 1:
        print("Malignant (Cancerous)")
    else:
        print("Benign (Non-Cancerous)")
    print(f"Prediction probabilities: {logistic_prediction_proba}\n")

    print("Linear Regression Prediction (for demonstration purposes):")
    print(linear_prediction[0], "\n")

    print("K-Nearest Neighbors Prediction:")
    if knn_prediction[0] == 1:
        print("Malignant (Cancerous)")
    else:
        print("Benign (Non-Cancerous)")
    print(f"Prediction probabilities: {knn_prediction_proba}\n")

    print("Decision Tree Prediction:")
    if tree_prediction[0] == 1:
        print("Malignant (Cancerous)")
    else:
        print("Benign (Non-Cancerous)")
    print(f"Prediction probabilities: {tree_prediction_proba}\n")

    print("Random Forest Prediction:")
    if forest_prediction[0] == 1:
        print("Malignant (Cancerous)")
    else:
        print("Benign (Non-Cancerous)")
    print(f"Prediction probabilities: {forest_prediction_proba}\n")

# Call the function to make predictions based on user input
predict_breast_cancer()
700



Please enter the following features:
mean_radius: 5
mean_texture: 5
mean_perimeter: 5
mean_area: 5
mean_smoothness: 5
mean_compactness: 5
mean_concavity: 5
mean_concave_points: 5
mean_symmetry: 5
mean_fractal_dimension: 5
radius_error: 5
texture_error: 5
perimeter_error: 5
area_error: 5
smoothness_error: 5
compactness_error: 5
concavity_error: 5
concave_points_error: 5
symmetry_error: 5
fractal_dimension_error: 5
worst_radius: 5
worst_texture: 5
worst_perimeter: 5
worst_area: 5
worst_smoothness: 5
worst_compactness: 5
worst_concavity: 5
worst_concave_points: 5
worst_symmetry: 5
worst_fractal_dimension: 5

Logistic Regression Prediction:
Benign (Non-Cancerous)
Prediction probabilities: [[1. 0.]]

Linear Regression Prediction (for demonstration purposes):
179.05789139520076 

K-Nearest Neighbors Prediction:
Benign (Non-Cancerous)
Prediction probabilities: [[0.6 0.4]]

Decision Tree Prediction:
Benign (Non-Cancerous)
Prediction probabilities: [[1. 0.]]

Random Forest Prediction:
Benign (N



700

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, mean_squared_error


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
column_names = [
    "ID", "Diagnosis", "mean_radius", "mean_texture", "mean_perimeter", "mean_area", "mean_smoothness",
    "mean_compactness", "mean_concavity", "mean_concave_points", "mean_symmetry", "mean_fractal_dimension",
    "radius_error", "texture_error", "perimeter_error", "area_error", "smoothness_error",
    "compactness_error", "concavity_error", "concave_points_error", "symmetry_error",
    "fractal_dimension_error", "worst_radius", "worst_texture", "worst_perimeter", "worst_area",
    "worst_smoothness", "worst_compactness", "worst_concavity", "worst_concave_points",
    "worst_symmetry", "worst_fractal_dimension"
]
data = pd.read_csv(url, names=column_names)


data['Diagnosis'] = data['Diagnosis'].map({'M': 1, 'B': 0})
X = data.drop(columns=['ID', 'Diagnosis'])
y = data['Diagnosis']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)


linear_model = LinearRegression()
linear_model.fit(X_train, y_train)


knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)


tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)


forest_model = RandomForestClassifier()
forest_model.fit(X_train, y_train)

# random inputs provided
def predict_breast_cancer():
    print("Please enter the following features:")
    user_input = [
        17.99, 10.38, 122.8, 1001.0, 0.11840, 0.27760, 0.3001, 0.14710, 0.2419, 0.07871,
        1.095, 0.9053, 8.589, 153.4, 0.006399, 0.04904, 0.05373, 0.01587, 0.03003, 0.006193,
        25.38, 17.33, 184.6, 2019.0, 0.16220, 0.66560, 0.7119, 0.26540, 0.4601, 0.11890
    ]


    user_input = np.array(user_input).reshape(1, -1)
    user_input = scaler.transform(user_input)

    logistic_prediction = logistic_model.predict(user_input)
    logistic_prediction_proba = logistic_model.predict_proba(user_input)

    linear_prediction = linear_model.predict(user_input)

    knn_prediction = knn_model.predict(user_input)
    knn_prediction_proba = knn_model.predict_proba(user_input)

    tree_prediction = tree_model.predict(user_input)
    tree_prediction_proba = tree_model.predict_proba(user_input)

    forest_prediction = forest_model.predict(user_input)
    forest_prediction_proba = forest_model.predict_proba(user_input)


    print("\nLogistic Regression Prediction:")
    if logistic_prediction[0] == 1:
        print("Malignant (Cancerous)")
    else:
        print("Benign (Non-Cancerous)")
    print(f"Prediction probabilities: {logistic_prediction_proba}\n")

    print("Linear Regression Prediction (for demonstration purposes):")
    print(linear_prediction[0], "\n")

    print("K-Nearest Neighbors Prediction:")
    if knn_prediction[0] == 1:
        print("Malignant (Cancerous)")
    else:
        print("Benign (Non-Cancerous)")
    print(f"Prediction probabilities: {knn_prediction_proba}\n")

    print("Decision Tree Prediction:")
    if tree_prediction[0] == 1:
        print("Malignant (Cancerous)")
    else:
        print("Benign (Non-Cancerous)")
    print(f"Prediction probabilities: {tree_prediction_proba}\n")

    print("Random Forest Prediction:")
    if forest_prediction[0] == 1:
        print("Malignant (Cancerous)")
    else:
        print("Benign (Non-Cancerous)")
    print(f"Prediction probabilities: {forest_prediction_proba}\n")


predict_breast_cancer()


Please enter the following features:

Logistic Regression Prediction:
Malignant (Cancerous)
Prediction probabilities: [[1.07247325e-08 9.99999989e-01]]

Linear Regression Prediction (for demonstration purposes):
0.9874160752379697 

K-Nearest Neighbors Prediction:
Malignant (Cancerous)
Prediction probabilities: [[0. 1.]]

Decision Tree Prediction:
Malignant (Cancerous)
Prediction probabilities: [[0. 1.]]

Random Forest Prediction:
Malignant (Cancerous)
Prediction probabilities: [[0.13 0.87]]



