In [None]:
#21. Write a Python program to train an SVM Classifier on the Iris dataset and evaluate accuracy:

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the SVM classifier
svm_classifier = svm.SVC()

# Define hyperparameters for tuning
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf', 'poly']}

# Perform grid search for hyperparameter tuning
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Train the SVM classifier with the best parameters
best_svm_classifier = grid_search.best_estimator_
best_svm_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_svm_classifier.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))

# Print the confusion matrix
print("Confusion Matrix:")
print(metrics.confusion_matrix(y_test, y_pred))


#22 Write a Python program to train two SVM classifiers with Linear and RBF kernels on the Wine dataset, then compare their accuracies:

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

# Load the Wine dataset
wine = datasets.load_wine()
X = wine.data
y = wine.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the SVM classifiers
svm_linear = svm.SVC(kernel='linear', C=1)
svm_rbf = svm.SVC(kernel='rbf', C=1)

# Train the SVM classifiers
svm_linear.fit(X_train, y_train)
svm_rbf.fit(X_train, y_train)

# Make predictions on the test set
y_pred_linear = svm_linear.predict(X_test)
y_pred_rbf = svm_rbf.predict(X_test)

# Evaluate the accuracy of the classifiers
accuracy_linear = metrics.accuracy_score(y_test, y_pred_linear)
accuracy_rbf = metrics.accuracy_score(y_test, y_pred_rbf)

print("Accuracy (Linear Kernel):", accuracy_linear)
print("Accuracy (RBF Kernel):", accuracy_rbf)

# Print the classification reports
print("Classification Report (Linear Kernel):")
print(metrics.classification_report(y_test, y_pred_linear))
print("Classification Report (RBF Kernel):")
print(metrics.classification_report(y_test, y_pred_rbf))

# Print the confusion matrices
print("Confusion Matrix (Linear Kernel):")
print(metrics.confusion_matrix(y_test, y_pred_linear))
print("Confusion Matrix (RBF Kernel):")
print(metrics.confusion_matrix(y_test, y_pred_rbf))

# Compare the accuracies
if accuracy_linear > accuracy_rbf:
    print("Linear kernel performs better with an accuracy of", accuracy_linear)
elif accuracy_rbf > accuracy_linear:
    print("RBF kernel performs better with an accuracy of", accuracy_rbf)
else:
    print("Both kernels perform equally well with an accuracy of", accuracy_linear)

#23 Write a Python program to train an SVM Regressor (SVR) on a housing dataset and evaluate it using Mean Squared Error (MSE):

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

# Load the Boston Housing dataset
from sklearn.datasets import load_boston
boston = load_boston()
X = boston.data
y = boston.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the SVM Regressor
svr = svm.SVR(kernel='rbf', C=100, epsilon=0.1)

# Train the SVM Regressor
svr.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svr.predict(X_test)

# Evaluate the Mean Squared Error (MSE)
mse = metrics.mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)

# Evaluate the Root Mean Squared Error (RMSE)
rmse = mse ** 0.5
print("Root Mean Squared Error (RMSE):", rmse)

# Evaluate the Mean Absolute Error (MAE)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)

# Evaluate the R-Squared value
r2 = metrics.r2_score(y_test, y_pred)
print("R-Squared value:", r2)


#24 Write a Python program to train an SVM Classifier with a Polynomial Kernel and visualize the decision boundary:

# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm

# Generate a sample dataset
np.random.seed(0)
mean1 = [0, 0]
cov1 = [[1, 0.5], [0.5, 1]]
data1 = np.random.multivariate_normal(mean1, cov1, 50)

mean2 = [5, 5]
cov2 = [[1, 0.5], [0.5, 1]]
data2 = np.random.multivariate_normal(mean2, cov2, 50)

X = np.vstack((data1, data2))
y = np.hstack((np.zeros(50), np.ones(50)))

# Train the SVM Classifier with Polynomial Kernel
svm_classifier = svm.SVC(kernel='poly', degree=3, C=1)
svm_classifier.fit(X, y)

# Plot the decision boundary
plt.figure(figsize=(8, 6))
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
Z = svm_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('SVM Classifier with Polynomial Kernel')
plt.show()


#25 Write a Python program to train a Gaussian Naïve Bayes classifier on the Breast Cancer dataset and evaluate accuracy:

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

# Load the Breast Cancer dataset
breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the Gaussian Naïve Bayes classifier
gnb = GaussianNB()

# Train the Gaussian Naïve Bayes classifier
gnb.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gnb.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))

# Print the confusion matrix
print("Confusion Matrix:")
print(metrics.confusion_matrix(y_test, y_pred))


#26  Write a Python program to train a Multinomial Naïve Bayes classifier for text classification using the 20 Newsgroups dataset.

# Import necessary libraries
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# Load the 20 Newsgroups dataset
newsgroups = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(newsgroups.data, newsgroups.target, test_size=0.2, random_state=42)

# Create a CountVectorizer object
vectorizer = CountVectorizer(stop_words='english')

# Fit the vectorizer to the training data and transform both the training and testing data
X_train_count = vectorizer.fit_transform(X_train)
X_test_count = vectorizer.transform(X_test)

# Define the Multinomial Naïve Bayes classifier
mnb = MultinomialNB()

# Train the Multinomial Naïve Bayes classifier
mnb.fit(X_train_count, y_train)

# Make predictions on the test set
y_pred = mnb.predict(X_test_count)

# Evaluate the accuracy of the classifier
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))

# Print the confusion matrix
print("Confusion Matrix:")
print(metrics.confusion_matrix(y_test, y_pred))


#27 Write a Python program to train an SVM Classifier with different C values and compare the decision boundaries visually

# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm

# Generate a sample dataset
np.random.seed(0)
mean1 = [0, 0]
cov1 = [[1, 0.5], [0.5, 1]]
data1 = np.random.multivariate_normal(mean1, cov1, 50)

mean2 = [5, 5]
cov2 = [[1, 0.5], [0.5, 1]]
data2 = np.random.multivariate_normal(mean2, cov2, 50)

X = np.vstack((data1, data2))
y = np.hstack((np.zeros(50), np.ones(50)))

# Train SVM Classifiers with different C values
C_values = [0.1, 1, 10]
fig, axs = plt.subplots(1, len(C_values), figsize=(15, 5))

for i, C in enumerate(C_values):
    svm_classifier = svm.SVC(kernel='rbf', C=C)
    svm_classifier.fit(X, y)

    # Plot the decision boundary
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
    Z = svm_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    axs[i].contourf(xx, yy, Z, alpha=0.8)
    axs[i].scatter(X[:, 0], X[:, 1], c=y)
    axs[i].set_title(f"C = {C}")

plt.show()


#28 Write a Python program to train a Bernoulli Naïve Bayes classifier for binary classification on a dataset with binary features

# Import necessary libraries
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn import metrics
import numpy as np

# Generate a sample dataset with binary features
np.random.seed(0)
X = np.random.randint(2, size=(100, 10))
y = np.random.randint(2, size=(100))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Bernoulli Naïve Bayes classifier
bnb = BernoulliNB()

# Train the Bernoulli Naïve Bayes classifier
bnb.fit(X_train, y_train)

# Make predictions on the test set
y_pred = bnb.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))

# Print the confusion matrix
print("Confusion Matrix:")
print(metrics.confusion_matrix(y_test, y_pred))


#29 Write a Python program to apply feature scaling before training an SVM model and compare results with unscaled data

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM model without feature scaling
svm_unscaled = svm.SVC(kernel='rbf', C=1)
svm_unscaled.fit(X_train, y_train)
y_pred_unscaled = svm_unscaled.predict(X_test)
accuracy_unscaled = metrics.accuracy_score(y_test, y_pred_unscaled)
print("Accuracy without feature scaling:", accuracy_unscaled)

# Apply feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train an SVM model with feature scaling
svm_scaled = svm.SVC(kernel='rbf', C=1)
svm_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = svm_scaled.predict(X_test_scaled)
accuracy_scaled = metrics.accuracy_score(y_test, y_pred_scaled)
print("Accuracy with feature scaling:", accuracy_scaled)

# Compare the results
print("Difference in accuracy:", accuracy_scaled - accuracy_unscaled)

# Print the classification reports
print("Classification Report without feature scaling:")
print(metrics.classification_report(y_test, y_pred_unscaled))
print("Classification Report with feature scaling:")
print(metrics.classification_report(y_test, y_pred_scaled))


#30 Write a Python program to train a Gaussian Naïve Bayes model and compare the predictions before and after Laplace Smoothing

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
import numpy as np

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a custom Gaussian Naïve Bayes class with Laplace Smoothing
class GaussianNBWithLaplace(GaussianNB):
    def __init__(self, var_smoothing=1e-9):
        super().__init__(var_smoothing=var_smoothing)

# Train a Gaussian Naïve Bayes model without Laplace Smoothing
gnb_without_laplace = GaussianNB(var_smoothing=0)
gnb_without_laplace.fit(X_train, y_train)
y_pred_without_laplace = gnb_without_laplace.predict(X_test)

# Train a Gaussian Naïve Bayes model with Laplace Smoothing
gnb_with_laplace = GaussianNBWithLaplace(var_smoothing=1e-9)
gnb_with_laplace.fit(X_train, y_train)
y_pred_with_laplace = gnb_with_laplace.predict(X_test)

# Compare the predictions
accuracy_without_laplace = metrics.accuracy_score(y_test, y_pred_without_laplace)
accuracy_with_laplace = metrics.accuracy_score(y_test, y_pred_with_laplace)
print("Accuracy without Laplace Smoothing:", accuracy_without_laplace)
print("Accuracy with Laplace Smoothing:", accuracy_with_laplace)

# Print the classification reports
print("Classification Report without Laplace Smoothing:")
print(metrics.classification_report(y_test, y_pred_without_laplace))
print("Classification Report with Laplace Smoothing:")
print(metrics.classification_report(y_test, y_pred_with_laplace))


#31 Write a Python program to train an SVM Classifier and use GridSearchCV to tune the hyperparameters (C,gamma, kernel)

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn import metrics

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the SVM Classifier
svm_classifier = svm.SVC()

# Define the hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto', 0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly']
}

# Perform grid search
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Train an SVM Classifier with the best parameters
best_svm_classifier = grid_search.best_estimator_
best_svm_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_svm_classifier.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))

# Print the confusion matrix
print("Confusion Matrix:")
print(metrics.confusion_matrix(y_test, y_pred))


#32 Write a Python program to train an SVM Classifier on an imbalanced dataset and apply class weighting and check it improve accuracy

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.datasets import make_classification
import numpy as np

# Generate an imbalanced dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=0, n_repeated=0, n_classes=2, weights=[0.9, 0.1], random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Classifier without class weighting
svm_unweighted = svm.SVC(kernel='rbf', C=1)
svm_unweighted.fit(X_train, y_train)
y_pred_unweighted = svm_unweighted.predict(X_test)

# Evaluate the accuracy of the unweighted classifier
accuracy_unweighted = metrics.accuracy_score(y_test, y_pred_unweighted)
print("Accuracy without class weighting:", accuracy_unweighted)

# Print the classification report
print("Classification Report without class weighting:")
print(metrics.classification_report(y_test, y_pred_unweighted))

# Train an SVM Classifier with class weighting
svm_weighted = svm.SVC(kernel='rbf', C=1, class_weight='balanced')
svm_weighted.fit(X_train, y_train)
y_pred_weighted = svm_weighted.predict(X_test)

# Evaluate the accuracy of the weighted classifier
accuracy_weighted = metrics.accuracy_score(y_test, y_pred_weighted)
print("Accuracy with class weighting:", accuracy_weighted)

# Print the classification report
print("Classification Report with class weighting:")
print(metrics.classification_report(y_test, y_pred_weighted))

# Compare the F1 scores
f1_unweighted = metrics.f1_score(y_test, y_pred_unweighted, average='macro')
f1_weighted = metrics.f1_score(y_test, y_pred_weighted, average='macro')
print("F1 score without class weighting:", f1_unweighted)
print("F1 score with class weighting:", f1_weighted)


#33 Write a Python program to implement a Naïve Bayes classifier for spam detection using email data

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# Load the email dataset
# For demonstration purposes, we'll use a sample dataset
data = {
    'email': [
        'You won a prize, claim now!',
        'Meeting at 2 PM today',
        'Get free cash now!',
        'Project update: everything is fine',
        'Win a free trip to Hawaii!',
        'New policy announcement',
        'You are a winner!',
        'Client feedback meeting',
        'Make money fast!',
        'Team lunch at 12 PM'
    ],
    'label': ['spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham']
}

df = pd.DataFrame(data)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['email'], df['label'], test_size=0.2, random_state=42)

# Create a CountVectorizer object
vectorizer = CountVectorizer(stop_words='english')

# Fit the vectorizer to the training data and transform both the training and testing data
X_train_count = vectorizer.fit_transform(X_train)
X_test_count = vectorizer.transform(X_test)

# Define the Naïve Bayes classifier
nb = MultinomialNB()

# Train the Naïve Bayes classifier
nb.fit(X_train_count, y_train)

# Make predictions on the test set
y_pred = nb.predict(X_test_count)

# Evaluate the accuracy of the classifier
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))

# Test the classifier with a new email
new_email = ['You won a prize!']
new_email_count = vectorizer.transform(new_email)
prediction = nb.predict(new_email_count)
print("Prediction:", prediction)


#34 Write a Python program to train an SVM Classifier and a Naïve Bayes Classifier on the same dataset and compare their accuracy

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Classifier
svm_classifier = svm.SVC(kernel='rbf', C=1)
svm_classifier.fit(X_train, y_train)
y_pred_svm = svm_classifier.predict(X_test)

# Evaluate the accuracy of the SVM Classifier
accuracy_svm = metrics.accuracy_score(y_test, y_pred_svm)
print("Accuracy of SVM Classifier:", accuracy_svm)

# Print the classification report for SVM Classifier
print("Classification Report for SVM Classifier:")
print(metrics.classification_report(y_test, y_pred_svm))

# Train a Naïve Bayes Classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
y_pred_nb = nb_classifier.predict(X_test)

# Evaluate the accuracy of the Naïve Bayes Classifier
accuracy_nb = metrics.accuracy_score(y_test, y_pred_nb)
print("Accuracy of Naïve Bayes Classifier:", accuracy_nb)

# Print the classification report for Naïve Bayes Classifier
print("Classification Report for Naïve Bayes Classifier:")
print(metrics.classification_report(y_test, y_pred_nb))

# Compare the accuracy of both classifiers
print("Difference in accuracy:", accuracy_svm - accuracy_nb)

# Determine which classifier is better
if accuracy_svm > accuracy_nb:
    print("SVM Classifier is better")
elif accuracy_nb > accuracy_svm:
    print("Naïve Bayes Classifier is better")
else:
    print("Both classifiers have the same accuracy")


#35 Write a Python program to perform feature selection before training a Naïve Bayes classifier and compare results

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Naïve Bayes classifier without feature selection
nb_without_fs = GaussianNB()
nb_without_fs.fit(X_train, y_train)
y_pred_without_fs = nb_without_fs.predict(X_test)
accuracy_without_fs = metrics.accuracy_score(y_test, y_pred_without_fs)
print("Accuracy without feature selection:", accuracy_without_fs)

# Perform feature selection using SelectKBest
selector = SelectKBest(score_func=chi2, k=2)
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test)

# Train a Naïve Bayes classifier with feature selection
nb_with_fs = GaussianNB()
nb_with_fs.fit(X_train_selected, y_train)
y_pred_with_fs = nb_with_fs.predict(X_test_selected)
accuracy_with_fs = metrics.accuracy_score(y_test, y_pred_with_fs)
print("Accuracy with feature selection:", accuracy_with_fs)

# Compare the results
print("Difference in accuracy:", accuracy_with_fs - accuracy_without_fs)

# Print the classification reports
print("Classification Report without feature selection:")
print(metrics.classification_report(y_test, y_pred_without_fs))
print("Classification Report with feature selection:")
print(metrics.classification_report(y_test, y_pred_with_fs))


#36 Write a Python program to train an SVM Classifier using One-vs-Rest (OvR) and One-vs-One (OvO) strategies on the Wine dataset and compare their accuracy=

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn import metrics

# Load the Wine dataset
wine = datasets.load_wine()
X = wine.data
y = wine.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Classifier using One-vs-Rest (OvR) strategy
ovr_svm = OneVsRestClassifier(svm.SVC(kernel='rbf', C=1))
ovr_svm.fit(X_train, y_train)
y_pred_ovr = ovr_svm.predict(X_test)
accuracy_ovr = metrics.accuracy_score(y_test, y_pred_ovr)
print("Accuracy of OvR SVM Classifier:", accuracy_ovr)

# Train an SVM Classifier using One-vs-One (OvO) strategy
ovo_svm = OneVsOneClassifier(svm.SVC(kernel='rbf', C=1))
ovo_svm.fit(X_train, y_train)
y_pred_ovo = ovo_svm.predict(X_test)
accuracy_ovo = metrics.accuracy_score(y_test, y_pred_ovo)
print("Accuracy of OvO SVM Classifier:", accuracy_ovo)

# Compare the accuracy of both classifiers
print("Difference in accuracy:", accuracy_ovr - accuracy_ovo)

# Determine which classifier is better
if accuracy_ovr > accuracy_ovo:
    print("OvR SVM Classifier is better")
elif accuracy_ovo > accuracy_ovr:
    print("OvO SVM Classifier is better")
else:
    print("Both classifiers have the same accuracy")

# Print the classification reports
print("Classification Report for OvR SVM Classifier:")
print(metrics.classification_report(y_test, y_pred_ovr))
print("Classification Report for OvO SVM Classifier:")
print(metrics.classification_report(y_test, y_pred_ovo))


#37 Write a Python program to train an SVM Classifier using Linear, Polynomial, and RBF kernels on the Breast Cancer dataset and compare their accuracy

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics

# Load the Breast Cancer dataset
breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Classifier with Linear kernel
linear_svm = svm.SVC(kernel='linear', C=1)
linear_svm.fit(X_train, y_train)
y_pred_linear = linear_svm.predict(X_test)
accuracy_linear = metrics.accuracy_score(y_test, y_pred_linear)
print("Accuracy of Linear SVM Classifier:", accuracy_linear)

# Train an SVM Classifier with Polynomial kernel
poly_svm = svm.SVC(kernel='poly', degree=3, C=1)
poly_svm.fit(X_train, y_train)
y_pred_poly = poly_svm.predict(X_test)
accuracy_poly = metrics.accuracy_score(y_test, y_pred_poly)
print("Accuracy of Polynomial SVM Classifier:", accuracy_poly)

# Train an SVM Classifier with RBF kernel
rbf_svm = svm.SVC(kernel='rbf', C=1)
rbf_svm.fit(X_train, y_train)
y_pred_rbf = rbf_svm.predict(X_test)
accuracy_rbf = metrics.accuracy_score(y_test, y_pred_rbf)
print("Accuracy of RBF SVM Classifier:", accuracy_rbf)

# Compare the accuracy of different kernels
print("Accuracy Comparison:")
print("Linear:", accuracy_linear)
print("Polynomial:", accuracy_poly)
print("RBF:", accuracy_rbf)

# Determine which kernel is better
max_accuracy = max(accuracy_linear, accuracy_poly, accuracy_rbf)
if max_accuracy == accuracy_linear:
    print("Linear kernel is better")
elif max_accuracy == accuracy_poly:
    print("Polynomial kernel is better")
else:
    print("RBF kernel is better")

# Print the classification reports
print("Classification Report for Linear SVM Classifier:")
print(metrics.classification_report(y_test, y_pred_linear))
print("Classification Report for Polynomial SVM Classifier:")
print(metrics.classification_report(y_test, y_pred_poly))
print("Classification Report for RBF SVM Classifier:")
print(metrics.classification_report(y_test, y_pred_rbf))


#38 Write a Python program to train an SVM Classifier using Stratified K-Fold Cross-Validation and compute the average accuracy
# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import StratifiedKFold
from sklearn import svm
from sklearn import metrics
import numpy as np

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Define the number of folds
n_folds = 5

# Define the SVM Classifier
svm_classifier = svm.SVC(kernel='rbf', C=1)

# Define the Stratified K-Fold Cross-Validation object
skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)

# Initialize a list to store the accuracy scores
accuracy_scores = []

# Perform Stratified K-Fold Cross-Validation
for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the SVM Classifier
    svm_classifier.fit(X_train, y_train)

    # Make predictions
    y_pred = svm_classifier.predict(X_test)

    # Compute the accuracy score
    accuracy = metrics.accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

# Compute the average accuracy
average_accuracy = np.mean(accuracy_scores)
print("Average Accuracy:", average_accuracy)

# Print the accuracy scores for each fold
print("Accuracy Scores for each fold:")
for i, accuracy in enumerate(accuracy_scores):
    print(f"Fold {i+1}: {accuracy}")

# Print the standard deviation of the accuracy scores
std_dev = np.std(accuracy_scores)
print("Standard Deviation of Accuracy Scores:", std_dev)


#39 Write a Python program to train a Naïve Bayes classifier using different prior probabilities and compare performance

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
import numpy as np

# Load the dataset
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=0, n_repeated=0, n_classes=2, weights=[0.7, 0.3], random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Naïve Bayes classifier with different prior probabilities
prior_probabilities = [[0.5, 0.5], [0.7, 0.3], [0.3, 0.7]]

for prior in prior_probabilities:
    nb = MultinomialNB(class_prior=prior)
    nb.fit(np.abs(X_train), y_train)
    y_pred = nb.predict(np.abs(X_test))
    accuracy = metrics.accuracy_score(y_test, y_pred)
    print(f"Prior Probabilities: {prior}, Accuracy: {accuracy}")

    # Print the classification report
    print(f"Classification Report for Prior Probabilities {prior}:")
    print(metrics.classification_report(y_test, y_pred))

# Compare the performance
# We will compare the accuracy of the models
accuracies = []
for prior in prior_probabilities:
    nb = MultinomialNB(class_prior=prior)
    nb.fit(np.abs(X_train), y_train)
    y_pred = nb.predict(np.abs(X_test))
    accuracy = metrics.accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

best_prior_index = np.argmax(accuracies)
best_prior = prior_probabilities[best_prior_index]
print(f"Best Prior Probabilities: {best_prior}")

#40 Write a Python program to perform Recursive Feature Elimination (RFE) before training an SVM Classifier and compare accuracy

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn import svm
from sklearn import metrics

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the SVM Classifier
svm_classifier = svm.SVC(kernel='rbf', C=1)

# Perform Recursive Feature Elimination (RFE)
rfe = RFE(estimator=svm_classifier, n_features_to_select=2)
rfe.fit(X_train, y_train)

# Get the selected features
X_train_selected = rfe.transform(X_train)
X_test_selected = rfe.transform(X_test)

# Train an SVM Classifier with selected features
svm_selected = svm.SVC(kernel='rbf', C=1)
svm_selected.fit(X_train_selected, y_train)
y_pred_selected = svm_selected.predict(X_test_selected)
accuracy_selected = metrics.accuracy_score(y_test, y_pred_selected)
print("Accuracy with selected features:", accuracy_selected)

# Train an SVM Classifier without feature selection
svm_without_selection = svm.SVC(kernel='rbf', C=1)
svm_without_selection.fit(X_train, y_train)
y_pred_without_selection = svm_without_selection.predict(X_test)
accuracy_without_selection = metrics.accuracy_score(y_test, y_pred_without_selection)
print("Accuracy without feature selection:", accuracy_without_selection)

# Compare the accuracy
print("Difference in accuracy:", accuracy_selected - accuracy_without_selection)

# Print the classification reports
print("Classification Report with selected features:")
print(metrics.classification_report(y_test, y_pred_selected))
print("Classification Report without feature selection:")
print(metrics.classification_report(y_test, y_pred_without_selection))


#41 Write a Python program to train an SVM Classifier and evaluate its performance using Precision, Recall, and F1-Score instead of accuracy

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Classifier
svm_classifier = svm.SVC(kernel='rbf', C=1)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

# Evaluate the performance using Precision, Recall, and F1-Score
precision = metrics.precision_score(y_test, y_pred, average='weighted')
recall = metrics.recall_score(y_test, y_pred, average='weighted')
f1_score = metrics.f1_score(y_test, y_pred, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1_score)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))

# Determine which class has the best performance
class_performance = metrics.classification_report(y_test, y_pred, output_dict=True)
best_class = max(class_performance, key=lambda x: class_performance[x]['f1-score'] if x != 'accuracy' and x != 'macro avg' and x != 'weighted avg' else 0)
print(f"Best performing class: {best_class}")

#42 Write a Python program to train a Naïve Bayes Classifier and evaluate its performance using Log Loss (Cross-Entropy Loss)

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
import numpy as np

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Naïve Bayes Classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Predict probabilities
y_pred_proba = nb_classifier.predict_proba(X_test)

# Evaluate the performance using Log Loss
log_loss = metrics.log_loss(y_test, y_pred_proba)
print("Log Loss:", log_loss)

# Predict class labels
y_pred = nb_classifier.predict(X_test)

# Evaluate the performance using accuracy
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))


#43 Write a Python program to train an SVM Classifier and visualize the Confusion Matrix using seaborn

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Classifier
svm_classifier = svm.SVC(kernel='rbf', C=1)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

# Create a Confusion Matrix
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)

# Visualize the Confusion Matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix, annot=True, cmap='Blues', fmt='d')
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix')
plt.show()

# Evaluate the performance
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))



#44 Write a Python program to train an SVM Regressor (SVR) and evaluate its performance using Mean Absolute Error (MAE) instead of MSE

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
import numpy as np

# Load the dataset
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Regressor
svr = svm.SVR(kernel='rbf', C=1)
svr.fit(X_train, y_train)
y_pred = svr.predict(X_test)

# Evaluate the performance using Mean Absolute Error (MAE)
mae = metrics.mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)

# Evaluate the performance using Mean Squared Error (MSE) for comparison
mse = metrics.mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)

# Evaluate the performance using R-Squared
r2 = metrics.r2_score(y_test, y_pred)
print("R-Squared:", r2)

# Plot the actual vs predicted values
import matplotlib.pyplot as plt
plt.scatter(y_test, y_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs Predicted Values')
plt.show()


#45  Write a Python program to train a Naïve Bayes classifier and evaluate its performance using the ROC-AUC score

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt

# Load the dataset
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=0, n_repeated=0, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Naïve Bayes classifier
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_proba = nb.predict_proba(X_test)[:, 1]

# Evaluate the performance using ROC-AUC score
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_proba)
roc_auc = metrics.auc(fpr, tpr)
print("ROC-AUC Score:", roc_auc)

# Plot the ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC Curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

#46 Write a Python program to train an SVM Classifier and visualize the Precision-Recall Curve.

# Import necessary libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt

# Load the dataset
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=0, n_repeated=0, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Classifier
svm_classifier = svm.SVC(kernel='rbf', C=1, probability=True)
svm_classifier.fit(X_train, y_train)
y_pred_proba = svm_classifier.predict_proba(X_test)[:, 1]

# Calculate precision and recall
precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred_proba)

# Calculate the area under the precision-recall curve
auc = metrics.auc(recall, precision)
print("Area under the Precision-Recall Curve:", auc)

# Plot the precision-recall curve
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, color='darkorange', lw=2, label='Precision-Recall Curve (area = %0.2f)' % auc)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower left")
plt.show()

# Evaluate the performance using accuracy
y_pred = svm_classifier.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(metrics.classification_report(y_test, y_pred))
