In [3]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Support Vector Machine (SVM) classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Display classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.956140350877193
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.95      0.94        43
           1       0.97      0.96      0.96        71

    accuracy                           0.96       114
   macro avg       0.95      0.96      0.95       114
weighted avg       0.96      0.96      0.96       114



In [1]:
pip install pandas numpy scikit-learn


Note: you may need to restart the kernel to use updated packages.




In [2]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.




In [3]:
# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the dataset
cancer = datasets.load_breast_cancer()

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, test_size=0.3, random_state=42)

# Initialize the Gaussian Naive Bayes classifier
clf = GaussianNB()

# Train the model using the training sets
clf.fit(X_train, y_train)

# Predict the response for test dataset
y_pred = clf.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9415204678362573


In [1]:
import numpy as np
from scipy.stats import multivariate_normal

class GaussianNaiveBayes:
    def __init__(self):
        self.classes = None
        self.class_priors = None
        self.class_means = None
        self.class_covariances = None

    def fit(self, X, y):
        self.classes = np.unique(y)
        self.class_priors = np.zeros(len(self.classes))
        self.class_means = []
        self.class_covariances = []

        # Compute class priors
        for i, c in enumerate(self.classes):
            X_c = X[y == c]
            self.class_priors[i] = len(X_c) / len(X)

            # Compute mean and covariance for each class
            class_mean = np.mean(X_c, axis=0)
            class_covariance = np.cov(X_c, rowvar=False)
            self.class_means.append(class_mean)
            self.class_covariances.append(class_covariance)

    def predict(self, X):
        # Initialize array to store predicted class probabilities
        predicted_probs = np.zeros((len(X), len(self.classes)))

        # Calculate likelihood for each class
        for i, c in enumerate(self.classes):
            class_mean = self.class_means[i]
            class_covariance = self.class_covariances[i]

            # Compute likelihood using multivariate Gaussian distribution
            likelihood = multivariate_normal.pdf(X, mean=class_mean, cov=class_covariance)

            # Multiply likelihood by class prior
            predicted_probs[:, i] = likelihood * self.class_priors[i]

        # Normalize probabilities
        predicted_probs /= np.sum(predicted_probs, axis=1, keepdims=True)

        # Predict class with highest probability for each sample
        predictions = np.argmax(predicted_probs, axis=1)
        return predictions
