In [None]:
!pip install numpy pandas scikit-learn

In [None]:
import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 

##### Data Collection and Processing

In [None]:
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()
print(breast_cancer_dataset)

In [None]:
# Loading the data to a data frame
df = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)

In [None]:
df.head()

In [None]:
# Adding new column
df['label'] = breast_cancer_dataset.target

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
# Statistical measures about the data
df.describe()

## Types of Tumors - 

### Benign --> Non-cancerous
### Malignant --> Cancerous

In [None]:
# Checking the distribution of Target variable
df['label'].value_counts() # 1 = Benign cases, 0 = Malignant cases

In [None]:
df.groupby('label').mean() 

#### Separating the features and target

In [None]:
X = df.drop(columns='label', axis=1) # for column axis 1 for row axis 0
Y = df['label']

In [None]:
print(X)

In [None]:
print(Y)

#### Splitting the data into training & testing data

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [None]:
X.shape, X_train.shape, X_test.shape

In [None]:
Y.shape, Y_train.shape, Y_test.shape

#### Model Training

##### Logistic Regression

In [None]:
model = LogisticRegression()

In [None]:
# Training the Logistic Regression model using Training data
model.fit(X_train, Y_train)

##### Model Evaluation

##### Acuuracy Score

In [None]:
# Accuracy on Training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

In [None]:
print("Accuracy on traing data =", training_data_accuracy)

In [None]:
# Accuracy on Test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

In [None]:
print("Accuracy on test data =", test_data_accuracy)

#### Building the Prediction Model

In [None]:
input_data = (13.08,15.71,85.63,520,0.1075,0.127,0.04568,0.0311,0.1967,0.06811,0.1852,0.7477,1.383,14.67,0.004097,0.01898,0.01698,0.00649,0.01678,0.002425,14.5,20.49,96.09,630.5,0.1312,0.2776,0.189,0.07283,0.3184,0.081838510824)

# Changing the input data to a numpy array
input_data_as_array = np.asarray(input_data)

# reshape the array for as we are predicting for one datapoint
input_data_reshaped = input_data_as_array.reshape(1, -1)

prediction = model.predict(input_data_reshaped)
# print(prediction)

if (prediction == 0):
    print("The Breast Cancer is Malignant")
else:
    print("The Breast Cancer is Benign")