: 

Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

Data Collection and Processing

In [None]:
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()

In [None]:
print(breast_cancer_dataset)

In [None]:
# Loading the data to a DataFrame
data_frame = pd.DataFrame(breast_cancer_dataset.data,columns=breast_cancer_dataset.feature_names)

In [None]:
data_frame.head()

In [None]:
# Adding the target to the DataFrame we have
data_frame['label'] = breast_cancer_dataset.target

In [None]:
data_frame.head() #First five rows

In [None]:
#No. of rows and col in dataset
data_frame.shape

In [None]:
# Getting some info od data
data_frame.info()

In [None]:
#Check for missing values
data_frame.isnull().sum()

In [None]:
# Statistical measures about the data
data_frame.describe() #25% means 25% are less than 11.7

In [None]:
# Checking the distribution of Target variable
data_frame['label'].value_counts()

1 => Benign
0 => Malignant

In [None]:
data_frame.groupby('label').mean()

Separating the Features and target

In [None]:
X = data_frame.drop(columns='label',axis=1)
Y = data_frame['label']

In [None]:
print(X)

In [None]:
print(Y)

Splitting the data into Training data and Testing data

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=2)

In [None]:
print(X.shape,X_train.shape,X_test.shape)

Model Training

Logistic Regression

In [None]:
model = LogisticRegression()

In [None]:
model.fit(X_train,Y_train)

Model Evaluation

Accuracy Score

In [None]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train,X_train_prediction)

In [None]:
print('Accuracy on training data : ',training_data_accuracy)

In [None]:
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test,X_test_prediction)

In [None]:
print('Accuracy on test data : ',test_data_accuracy)

Building a Predictive System

In [None]:
input_data = (20.57,17.77,132.9,1326,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956,0.1238,0.1866,0.2416,0.186,0.275,0.08902
)
# Convert tuple into array so that we can reshape it
input_data_as_numpy_array = np.asarray(input_data)

# Reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)

if (prediction[0] == 0):
    print('The Breast Cancer is Malignant')
else:
    print('The Breast Cancer is Benign')