In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd

## IRIS DATASET

In [2]:
from sklearn.datasets import load_iris

In [3]:
# Loading Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)

# Decision Tree Classifier

In [8]:
from sklearn.tree import DecisionTreeClassifier

In [10]:
# Implementation
dt_classifier = DecisionTreeClassifier()

In [11]:
dt_classifier.fit(X_train, y_train)

In [12]:
dt_classifier.fit(X_train, y_train)

In [13]:
from sklearn.metrics import accuracy_score, classification_report

In [14]:
# Evaluation metrics
print("Decision Tree Classifier (without hyperparameters):")
print("Accuracy:", accuracy_score(y_test, dt_predictions))

Decision Tree Classifier (without hyperparameters):
Accuracy: 1.0


In [15]:
print("Classification Report:")
print(classification_report(y_test, dt_predictions))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



### Hyperparameter tuning 

In [16]:
from sklearn.model_selection import GridSearchCV

In [18]:
# Hyperparameter tuning 
dt_params = {'criterion': ['gini', 'entropy'], 'max_depth': [None, 5, 10, 15]}
dt_grid = GridSearchCV(DecisionTreeClassifier(), dt_params, cv=5)

In [19]:
dt_grid.fit(X_train, y_train)

In [20]:
best_dt_model = dt_grid.best_estimator_

In [21]:
# Implementation 
best_dt_model.fit(X_train, y_train)

In [22]:
best_dt_predictions = best_dt_model.predict(X_test)

In [23]:
# Evaluation metrics
print("\nDecision Tree Classifier (with hyperparameters):")
print("Accuracy:", accuracy_score(y_test, best_dt_predictions))


Decision Tree Classifier (with hyperparameters):
Accuracy: 1.0


In [24]:
print("Classification Report:")
print(classification_report(y_test, best_dt_predictions))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



# Naive Bayes Classifier

In [25]:
from sklearn.naive_bayes import GaussianNB

In [26]:
# Implementation 
nb_classifier = GaussianNB()

In [27]:
nb_classifier.fit(X_train, y_train)

In [28]:
nb_predictions = nb_classifier.predict(X_test)

In [29]:
# Evaluation metrics
print("\nNaive Bayes Classifier (without hyperparameters):")
print("Accuracy:", accuracy_score(y_test, nb_predictions))


Naive Bayes Classifier (without hyperparameters):
Accuracy: 1.0


In [30]:
print("Classification Report:")
print(classification_report(y_test, nb_predictions))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



### Hyperparameter tuning

In [31]:
# Hyperparameter tuning
nb_params = {'var_smoothing': np.logspace(0,-9, num=100)}
nb_grid = GridSearchCV(GaussianNB(), nb_params, cv=5)

In [32]:
nb_grid.fit(X_train, y_train)

In [33]:
best_nb_model = nb_grid.best_estimator_

In [34]:
# Implementation
best_nb_model.fit(X_train, y_train)
best_nb_predictions = best_nb_model.predict(X_test)

In [35]:
# Evaluation metrics
print("\nNaive Bayes Classifier (with hyperparameters):")
print("Accuracy:", accuracy_score(y_test, best_nb_predictions))


Naive Bayes Classifier (with hyperparameters):
Accuracy: 0.9666666666666667


In [36]:
print("Classification Report:")
print(classification_report(y_test, best_nb_predictions))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       0.92      1.00      0.96        12
           2       1.00      0.90      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



# Support Vector Machine

In [37]:
from sklearn.svm import SVC

In [38]:
# Implementation
svm_classifier = SVC()

In [39]:
svm_classifier.fit(X_train, y_train)

In [40]:
svm_predictions = svm_classifier.predict(X_test)

In [42]:
# Evaluation metrics
print("\nSupport Vector Machine (without hyperparameters):")
print("Accuracy:", accuracy_score(y_test, svm_predictions))


Support Vector Machine (without hyperparameters):
Accuracy: 1.0


In [43]:
print("Classification Report:")
print(classification_report(y_test, svm_predictions))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



### Hyperparameter tuning

In [44]:
# Hyperparameter tuning
svm_params = {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}
svm_grid = GridSearchCV(SVC(), svm_params, cv=5)

In [45]:
svm_grid.fit(X_train, y_train)

In [46]:
best_svm_model = svm_grid.best_estimator_

In [47]:
# Implementation 
best_svm_model.fit(X_train, y_train)
best_svm_predictions = best_svm_model.predict(X_test)

In [48]:
# Evaluation metrics
print("\nSupport Vector Machine (with hyperparameters):")
print("Accuracy:", accuracy_score(y_test, best_svm_predictions))


Support Vector Machine (with hyperparameters):
Accuracy: 1.0


In [49]:
print("Classification Report:")
print(classification_report(y_test, best_svm_predictions))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



## BREAST CANCER DATASET

In [51]:
from sklearn.datasets import load_breast_cancer

In [52]:
# Loading Breast Cancer dataset
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

In [53]:
# Convert to DataFrame for easier inspection
df = pd.DataFrame(data=np.c_[breast_cancer['data'], breast_cancer['target']],
                  columns=np.append(breast_cancer['feature_names'], ['target']))

In [55]:
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0.0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0.0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0.0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0.0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0.0


In [56]:
df.tail()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
564,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,...,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,0.0
565,20.13,28.25,131.2,1261.0,0.0978,0.1034,0.144,0.09791,0.1752,0.05533,...,38.25,155.0,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,0.0
566,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,...,34.12,126.7,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782,0.0
567,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,0.2397,0.07016,...,39.42,184.6,1821.0,0.165,0.8681,0.9387,0.265,0.4087,0.124,0.0
568,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,0.1587,0.05884,...,30.37,59.16,268.6,0.08996,0.06444,0.0,0.0,0.2871,0.07039,1.0


In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

### Decision Tree Classifier

In [58]:
dt_classifier = DecisionTreeClassifier()

In [59]:
dt_classifier.fit(X_train, y_train)

In [60]:
dt_predictions = dt_classifier.predict(X_test)

In [61]:
print("Accuracy:", accuracy_score(y_test, dt_predictions))
print("Classification Report:")
print(classification_report(y_test, dt_predictions))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [75]:
dt_params = {'criterion': ['gini', 'entropy'], 'max_depth': [None, 5, 10, 15]}
dt_grid = GridSearchCV(DecisionTreeClassifier(), dt_params, cv=5)

In [76]:
dt_grid.fit(X_train, y_train)
best_dt_model = dt_grid.best_estimator_

In [77]:
# Implementation of Decision Tree Classifier with hyperparameters
best_dt_model.fit(X_train, y_train)
best_dt_predictions = best_dt_model.predict(X_test)

In [78]:
print("Accuracy:", accuracy_score(y_test, best_dt_predictions))
print("Classification Report:")
print(classification_report(y_test, best_dt_predictions))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



### Naive Bayes Classifier

In [62]:
nb_classifier = GaussianNB()

In [63]:
nb_classifier.fit(X_train, y_train)

In [64]:
nb_predictions = nb_classifier.predict(X_test)

In [65]:
print("Accuracy:", accuracy_score(y_test, nb_predictions))
print("Classification Report:")
print(classification_report(y_test, nb_predictions))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [None]:
# Hyperparameter tuning for Naive Bayes Classifier
nb_params = {'var_smoothing': np.logspace(0,-9, num=100)}
nb_grid = GridSearchCV(GaussianNB(), nb_params, cv=5)


In [None]:
nb_grid.fit(X_train, y_train)
best_nb_model = nb_grid.best_estimator_

In [79]:
best_nb_model.fit(X_train, y_train)
best_nb_predictions = best_nb_model.predict(X_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, best_nb_predictions))
print("Classification Report:")
print(classification_report(y_test, best_nb_predictions))

## Support Vector Machine

In [66]:
svm_classifier = SVC()

In [67]:
svm_classifier.fit(X_train, y_train)

In [68]:
svm_predictions = svm_classifier.predict(X_test)

In [69]:
print("Accuracy:", accuracy_score(y_test, svm_predictions))
print("Classification Report:")
print(classification_report(y_test, svm_predictions))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



## Hyperparameter tuning

In [71]:
svm_params = {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}
svm_grid = GridSearchCV(SVC(), svm_params, cv=5)

In [72]:
svm_grid.fit(X_train, y_train)
best_svm_model = svm_grid.best_estimator_

In [73]:
best_svm_model.fit(X_train, y_train)
best_svm_predictions = best_svm_model.predict(X_test)

In [74]:
print("Accuracy:", accuracy_score(y_test, best_svm_predictions))
print("Classification Report:")
print(classification_report(y_test, best_svm_predictions))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

