In [None]:
# Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Load the dataset
data = pd.read_csv("cancer_dataset.csv")

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size=0.2, random_state=42)

# Create the Naïve Bayesian model
model = GaussianNB()

# Train the model on the training set
model.fit(X_train, y_train)

# Test the model on the testing set
y_pred = model.predict(X_test)

# Calculate the performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, pos_label='M')
recall = recall_score(y_test, y_pred, pos_label='M')

# Print the performance metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)



In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Load the dataset
data = pd.read_csv('cancer_dataset.csv')

# Split the data into features and labels
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the range of values for K, the number of principal components
K_range = range(1, X_scaled.shape[1] + 1)

# Initialize empty lists to store performance metrics for each value of K
accuracy_scores = []
precision_scores = []
recall_scores = []

# Loop through each value of K and perform independent training
for K in K_range:
    # Create a PCA object with K principal components
    pca = PCA(n_components=K)
    X_pca = pca.fit_transform(X_scaled)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

    # Create a logistic regression object
    clf = LogisticRegression(random_state=42)

    # Train the model on the training set
    clf.fit(X_train, y_train)

    # Test the model on the testing set
    y_pred = clf.predict(X_test)

    # Calculate the performance metrics and append them to the corresponding lists
    accuracy_scores.append(accuracy_score(y_test, y_pred))
    precision_scores.append(precision_score(y_test, y_pred, pos_label='M'))
    recall_scores.append(recall_score(y_test, y_pred, pos_label='M'))

# Plot the performance metrics over different numbers of K
import matplotlib.pyplot as plt

plt.plot(K_range, accuracy_scores, label='Accuracy')
plt.plot(K_range, precision_scores, label='Precision')
plt.plot(K_range, recall_scores, label='Recall')
plt.xlabel('Number of Principal Components')
plt.ylabel('Score')
plt.title('Performance of Logistic Regression Model with PCA Feature Extraction')
plt.legend()
plt.show()


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Load the dataset
data = pd.read_csv('cancer_dataset.csv')

# Split the data into features and labels
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the range of values for K, the number of principal components
K_range = range(1, X_scaled.shape[1] + 1)

# Initialize empty lists to store performance metrics for each value of K
accuracy_scores = []
precision_scores = []
recall_scores = []

# Loop through each value of K and perform independent training
for K in K_range:
    # Create a PCA object with K principal components
    pca = PCA(n_components=K)
    X_pca = pca.fit_transform(X_scaled)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

    # Create a Naive Bayes object
    clf = GaussianNB()

    # Train the model on the training set
    clf.fit(X_train, y_train)

    # Test the model on the testing set
    y_pred = clf.predict(X_test)

    # Calculate the performance metrics and append them to the corresponding lists
    accuracy_scores.append(accuracy_score(y_test, y_pred))
    precision_scores.append(precision_score(y_test, y_pred, pos_label='M'))
    recall_scores.append(recall_score(y_test, y_pred, pos_label='M'))

# Plot the performance metrics over different numbers of K
import matplotlib.pyplot as plt

plt.plot(K_range, accuracy_scores, label='Accuracy')
plt.plot(K_range, precision_scores, label='Precision')
plt.plot(K_range, recall_scores, label='Recall')
plt.xlabel('Number of Principal Components')
plt.ylabel('Score')
plt.title('Performance of Naive Bayes Classifier with PCA Feature Extraction')
plt.legend()
plt.show()
