In [2]:
try:
    from sklearn.datasets import load_iris
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.preprocessing import Binarizer
    from sklearn.datasets import fetch_20newsgroups
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.pipeline import make_pipeline

except:
    !pip install scikit-learn

    from sklearn.datasets import load_iris
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.preprocessing import Binarizer
    from sklearn.datasets import fetch_20newsgroups
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.pipeline import make_pipeline

###Loading Iris Dataset

In [3]:
iris = load_iris()
X, y = iris.data, iris.target

###Spliting Data

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

###Logistic Regression

In [5]:
log_reg = LogisticRegression(max_iter=200)
log_reg.fit(X_train, y_train)

##Predicting values of X_test

In [6]:
y_pred_log_reg = log_reg.predict(X_test)

###Evaluation Metrics

In [7]:
log_reg_metrics = {
    'Accuracy': accuracy_score(y_test, y_pred_log_reg),
    'Precision': precision_score(y_test, y_pred_log_reg, average='weighted'),
    'Recall': recall_score(y_test, y_pred_log_reg, average='weighted'),
    'F1-Score': f1_score(y_test, y_pred_log_reg, average='weighted')
}

log_reg_metrics

{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1-Score': 1.0}

##Gaussian NB

###Initializing Gaussian NB

In [8]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)

###Predicting values of X_test

In [9]:
y_pred_gnb = gnb.predict(X_test)

###Evaluation Metrics

In [10]:
gnb_metrics = {
    'Accuracy': accuracy_score(y_test, y_pred_gnb),
    'Precision': precision_score(y_test, y_pred_gnb, average='weighted'),
    'Recall': recall_score(y_test, y_pred_gnb, average='weighted'),
    'F1-Score': f1_score(y_test, y_pred_gnb, average='weighted')
}
gnb_metrics

{'Accuracy': 0.9777777777777777,
 'Precision': 0.9793650793650793,
 'Recall': 0.9777777777777777,
 'F1-Score': 0.9777448559670783}

###Bernoulli NB

###Transform features into binary values

In [11]:
binarizer = Binarizer()
X_binarized = binarizer.fit_transform(X)

###Spliting the binary data

In [12]:
X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(X_binarized, y, test_size=0.3, random_state=42)

###Initalizing Bernoulli NB

In [13]:
bnb = BernoulliNB()
bnb.fit(X_train_bin, y_train_bin)

###Predicting values of X_test


In [14]:
y_pred_bnb = bnb.predict(X_test_bin)

###Evaluation Metrics

In [16]:
bnb_metrics = {
    'Accuracy': accuracy_score(y_test_bin, y_pred_bnb),
    'Precision': precision_score(y_test_bin, y_pred_bnb, average='weighted'),
    'Recall': recall_score(y_test_bin, y_pred_bnb, average='weighted'),
    'F1-Score': f1_score(y_test_bin, y_pred_bnb, average='weighted')
}
bnb_metrics

  _warn_prf(average, modifier, msg_start, len(result))


{'Accuracy': 0.28888888888888886,
 'Precision': 0.08345679012345678,
 'Recall': 0.28888888888888886,
 'F1-Score': 0.12950191570881225}

####Loading Newsgroup dataset

In [17]:
# Load the 20 Newsgroups dataset
newsgroups = fetch_20newsgroups(subset='train')
newsgroups_test = fetch_20newsgroups(subset='test')

###Multinominal NB

###Initializing MultinominalNB

In [18]:
# Create a pipeline that combines the vectorizer and the MultinomialNB model
model = make_pipeline(CountVectorizer(), MultinomialNB())

# Train the model
model.fit(newsgroups.data, newsgroups.target)

###Predicting values of X_test

In [19]:
# Predict on the test set
y_pred_mnb = model.predict(newsgroups_test.data)

###Evaluation Metrics

In [20]:
# Evaluate the performance
mnb_metrics = {
    'Accuracy': accuracy_score(newsgroups_test.target, y_pred_mnb),
    'Precision': precision_score(newsgroups_test.target, y_pred_mnb, average='weighted'),
    'Recall': recall_score(newsgroups_test.target, y_pred_mnb, average='weighted'),
    'F1-Score': f1_score(newsgroups_test.target, y_pred_mnb, average='weighted')
}
mnb_metrics

{'Accuracy': 0.7728359001593202,
 'Precision': 0.7616683207318354,
 'Recall': 0.7728359001593202,
 'F1-Score': 0.7511127577441177}

###Comparison

In [21]:
import pandas as pd

# Combine all metrics into a DataFrame for comparison
metrics_comparison = pd.DataFrame({
    'Logistic Regression': log_reg_metrics,
    'GaussianNB': gnb_metrics,
    'BernoulliNB': bnb_metrics,
    'MultinomialNB': mnb_metrics
})
metrics_comparison

Unnamed: 0,Logistic Regression,GaussianNB,BernoulliNB,MultinomialNB
Accuracy,1.0,0.977778,0.288889,0.772836
Precision,1.0,0.979365,0.083457,0.761668
Recall,1.0,0.977778,0.288889,0.772836
F1-Score,1.0,0.977745,0.129502,0.751113


In [None]:
from matplotlib import pyplot as plt
metrics_comparison['BernoulliNB'].plot(kind='hist', bins=20, title='BernoulliNB')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [23]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=metrics_comparison)

https://docs.google.com/spreadsheets/d/1H-iTZVNRTmtUasWWdbyy-z3U4ZFGCxAwzu6Yh8OmCJU#gid=0


  return frame.applymap(_clean_val).replace({np.nan: None})
