<a href="https://colab.research.google.com/github/MohitPolisetty/DS-Experiments-TY-CSE-AI-DS/blob/main/Experiment12DS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import required libraries for various machine learning models
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, load_wine, load_breast_cancer, fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.feature_extraction.text import CountVectorizer

# 1. Naive Bayes Classifier: Working with Text Data
# Load a text-based dataset for classification tasks
news_data = fetch_20newsgroups(subset='train')
text_samples, labels = news_data.data, news_data.target

# Convert text data to numerical format using Count Vectorization
text_vectorizer = CountVectorizer()
text_features = text_vectorizer.fit_transform(text_samples)

# Split data into training and testing sets
text_train, text_test, label_train, label_test = train_test_split(
    text_features, labels, test_size=0.3
)

# Train a Naive Bayes model
naive_bayes = MultinomialNB()
naive_bayes.fit(text_train, label_train)

# Evaluate the model's performance
nb_accuracy = naive_bayes.score(text_test, label_test)
print(f"Accuracy of Naive Bayes (Text Classification): {nb_accuracy:.4f}")

# 2. Random Forest Classifier: Wine Dataset
# Load wine dataset for multi-class classification
wine_data = load_wine()
wine_features, wine_labels = wine_data.data, wine_data.target

# Split data into training and testing sets
wine_train, wine_test, wine_label_train, wine_label_test = train_test_split(
    wine_features, wine_labels, test_size=0.3
)

# Train a Random Forest model
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(wine_train, wine_label_train)

# Evaluate the model's performance
rf_accuracy = random_forest.score(wine_test, wine_label_test)
print(f"Accuracy of Random Forest (Wine Classification): {rf_accuracy:.4f}")

# 3. Decision Tree Classifier: Iris Dataset
# Load Iris dataset for classification
iris_data = load_iris()
iris_features, iris_labels = iris_data.data, iris_data.target

# Split data into training and testing sets
iris_train, iris_test, iris_label_train, iris_label_test = train_test_split(
    iris_features, iris_labels, test_size=0.3
)

# Train a Decision Tree model
decision_tree = DecisionTreeClassifier()
decision_tree.fit(iris_train, iris_label_train)

# Evaluate the model's performance
dt_accuracy = decision_tree.score(iris_test, iris_label_test)
print(f"Accuracy of Decision Tree (Iris Classification): {dt_accuracy:.4f}")

# 4. Support Vector Machine: Breast Cancer Dataset
# Load breast cancer dataset for binary classification
cancer_data = load_breast_cancer()
cancer_features, cancer_labels = cancer_data.data, cancer_data.target

# Split data into training and testing sets
cancer_train, cancer_test, cancer_label_train, cancer_label_test = train_test_split(
    cancer_features, cancer_labels, test_size=0.3
)

# Train an SVM model with a linear kernel
svm_model = SVC(kernel='linear')
svm_model.fit(cancer_train, cancer_label_train)

# Evaluate the model's performance
svm_accuracy = svm_model.score(cancer_test, cancer_label_test)
print(f"Accuracy of SVM (Cancer Classification): {svm_accuracy:.4f}")

# 5. K-Nearest Neighbors: Iris Dataset
# Reuse the Iris dataset for classification
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(iris_train, iris_label_train)

# Evaluate the KNN model
knn_accuracy = knn_model.score(iris_test, iris_label_test)
print(f"Accuracy of KNN (Iris Classification): {knn_accuracy:.4f}")

# 6. XGBoost Classifier: Wine Dataset
# Reuse the Wine dataset for classification
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb_model.fit(wine_train, wine_label_train)

# Evaluate the XGBoost model
xgb_accuracy = xgb_model.score(wine_test, wine_label_test)
print(f"Accuracy of XGBoost (Wine Classification): {xgb_accuracy:.4f}")

# 7. AdaBoost Classifier: Iris Dataset
# Train an AdaBoost classifier using a weak learner (Decision Tree)
ada_boost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=50)
ada_boost.fit(iris_train, iris_label_train)

# Evaluate the AdaBoost model
ada_accuracy = ada_boost.score(iris_test, iris_label_test)
print(f"Accuracy of AdaBoost (Iris Classification): {ada_accuracy:.4f}")


Accuracy of Naive Bayes (Text Classification): 0.8206
Accuracy of Random Forest (Wine Classification): 0.9815
Accuracy of Decision Tree (Iris Classification): 0.9333
Accuracy of SVM (Cancer Classification): 0.9415
Accuracy of KNN (Iris Classification): 0.9556


Parameters: { "use_label_encoder" } are not used.



Accuracy of XGBoost (Wine Classification): 0.9444
Accuracy of AdaBoost (Iris Classification): 0.9333
