# Importing needed python modules

In [274]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
#Importing algorithm libraries
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
#Metrics of the models
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [275]:
#Loading data into dataframe(df)
df=pd.read_csv('Prostate_Cancer.csv')

In [276]:
print(df.head(10))#Print all data of top 10 rows

   id diagnosis_result  radius  texture  perimeter  area  smoothness  \
0   1                M      23       12        151   954       0.143   
1   2                B       9       13        133  1326       0.143   
2   3                M      21       27        130  1203       0.125   
3   4                M      14       16         78   386       0.070   
4   5                M       9       19        135  1297       0.141   
5   6                B      25       25         83   477       0.128   
6   7                M      16       26        120  1040       0.095   
7   8                M      15       18         90   578       0.119   
8   9                M      19       24         88   520       0.127   
9  10                M      25       11         84   476       0.119   

   compactness  symmetry  fractal_dimension  
0        0.278     0.242              0.079  
1        0.079     0.181              0.057  
2        0.160     0.207              0.060  
3        0.284     0.26

In [277]:
print(df.shape)#Print the row and clumn count of the data

(100, 10)


In [278]:
print(df.isnull().sum())#Print all columns with empty data along with sum of empty data

id                   0
diagnosis_result     0
radius               0
texture              0
perimeter            0
area                 0
smoothness           0
compactness          0
symmetry             0
fractal_dimension    0
dtype: int64


In [279]:
df=df.dropna(axis=1)#Drop the column with empty data

In [280]:
df=df.drop(['id'],axis=1)

## Encoding first column

In [281]:
labelencoder_X=LabelEncoder()#Calling LabelEncoder

In [282]:
df.iloc[:,0]=labelencoder_X.fit_transform(df.iloc[:,0].values)#Encoding the values of diagnosis column to values

  df.iloc[:,0]=labelencoder_X.fit_transform(df.iloc[:,0].values)#Encoding the values of diagnosis column to values


## Splitting data for dependence

In [283]:
X=df.iloc[:,1:].values#Features of cancerous and non cancerous patients
Y=df.iloc[:,0].values#Whether patient has cancer or not

## Train-Test split

In [284]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=1)

In [285]:
model_accuracies = {}

## Standard scaling

In [286]:
sc=StandardScaler()
X_train=sc.fit_transform(X_train)#Scaling X_train
X_test=sc.fit_transform(X_test)#Scaling X_test

# Logistic Regression 

In [287]:
log=LogisticRegression(random_state=0)
log.fit(X_train,Y_train)

## Model predictions and evaluations

In [288]:
print("Logistic regression:",log.score(X_train,Y_train))

Logistic regression: 0.88


In [289]:
print("\nModel: Logistic Regression")
print("Classification Report")
print(classification_report(Y_test,log.predict(X_test)))
print("Accuracy Score:",accuracy_score(Y_test,log.predict(X_test)))
model_accuracies['Logistic Regression'] = accuracy_score(Y_test,log.predict(X_test))


Model: Logistic Regression
Classification Report
              precision    recall  f1-score   support

           0       0.86      0.60      0.71        10
           1       0.78      0.93      0.85        15

    accuracy                           0.80        25
   macro avg       0.82      0.77      0.78        25
weighted avg       0.81      0.80      0.79        25

Accuracy Score: 0.8


# Decision Tree Classifier

In [290]:
tree=DecisionTreeClassifier(criterion='entropy',random_state=0)
tree.fit(X_train,Y_train)

## Model predictions and evaluations

In [291]:
print("Decision Tree:",tree.score(X_train,Y_train))

Decision Tree: 1.0


In [292]:
print("\nModel: Decision tree")
print("Classification Report")
print(classification_report(Y_test,tree.predict(X_test)))
print("Accuracy Score:",accuracy_score(Y_test,tree.predict(X_test)))
model_accuracies['Decision Tree Classifier'] = accuracy_score(Y_test,tree.predict(X_test))


Model: Decision tree
Classification Report
              precision    recall  f1-score   support

           0       0.62      0.50      0.56        10
           1       0.71      0.80      0.75        15

    accuracy                           0.68        25
   macro avg       0.67      0.65      0.65        25
weighted avg       0.67      0.68      0.67        25

Accuracy Score: 0.68


# Random Forests Classifier

In [293]:
forest=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
forest.fit(X_train,Y_train)

## Random Forests Classifier Predictions and their Evaluation

In [294]:
print("Random Forest:",forest.score(X_train,Y_train))

Random Forest: 0.9733333333333334


In [295]:
print("\nModel: Random forest")
print("Classification Report")
print(classification_report(Y_test,forest.predict(X_test)))
print("Accuracy Score:",accuracy_score(Y_test,forest.predict(X_test)))
model_accuracies['Random Forests Classifier'] = accuracy_score(Y_test,forest.predict(X_test))


Model: Random forest
Classification Report
              precision    recall  f1-score   support

           0       0.82      0.90      0.86        10
           1       0.93      0.87      0.90        15

    accuracy                           0.88        25
   macro avg       0.87      0.88      0.88        25
weighted avg       0.88      0.88      0.88        25

Accuracy Score: 0.88


# Gaussian Naive Bayes Classifier

In [296]:
gnb = GaussianNB()
gnb.fit(X_train,Y_train)

## Gaussian Naive Bayes Predictions and Evaluations

In [297]:
print("GaussianNB:",gnb.score(X_train,Y_train))

GaussianNB: 0.8666666666666667


In [298]:
print("\nModel: GaussianNB")
print("Classification Report")
print(classification_report(Y_test,gnb.predict(X_test)))
print("Accuracy Score:",accuracy_score(Y_test,gnb.predict(X_test)))
model_accuracies['GNB'] = accuracy_score(Y_test,gnb.predict(X_test))


Model: GaussianNB
Classification Report
              precision    recall  f1-score   support

           0       0.62      1.00      0.77        10
           1       1.00      0.60      0.75        15

    accuracy                           0.76        25
   macro avg       0.81      0.80      0.76        25
weighted avg       0.85      0.76      0.76        25

Accuracy Score: 0.76


In [299]:
print("Performance of ML Algorithms:")
print('Logistic Regression:', str(np.round(model_accuracies['Logistic Regression']*100,2)) + ' %')
print('Decision Tree Classifier:', str(np.round(model_accuracies['Decision Tree Classifier']*100,2)) + ' %')
print('Random Forests Classifier:', str(np.round(model_accuracies['Random Forests Classifier']*100,2)) + ' %')
print('Gaussian Naive Bayes:', str(np.round(model_accuracies['GNB']*100,2)) + ' %')

Performance of ML Algorithms:
Logistic Regression: 80.0 %
Decision Tree Classifier: 68.0 %
Random Forests Classifier: 88.0 %
Gaussian Naive Bayes: 76.0 %
