In [4]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
import pandas as pd

def bag_of_words(texts):
    vectorizer = CountVectorizer()
    bow_matrix = vectorizer.fit_transform(texts)
    feature_names = vectorizer.get_feature_names_out()
    return pd.DataFrame(bow_matrix.toarray(), columns=feature_names)

def tfidf_representation(texts):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(texts)
    feature_names = vectorizer.get_feature_names_out()
    return pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)

def train_naive_bayes_classifier(texts, labels):
    model = make_pipeline(CountVectorizer(), MultinomialNB())
    model.fit(texts, labels)
    return model

def train_decision_tree_classifier(texts, labels):
    model = make_pipeline(TfidfVectorizer(), DecisionTreeClassifier())
    model.fit(texts, labels)
    return model

def predict_category(model, new_text):
    return model.predict([new_text])[0]

if __name__ == "__main__":
    texts = ["I love programming", "Python is great", "Machine learning is amazing"]
    labels = ["programming", "python", "machine learning"]
    
    dt_texts = ["Spam messages are annoying", "I won a lottery", "This is a normal message"]
    dt_labels = ["spam", "spam", "normal"]
    
    nb_model = train_naive_bayes_classifier(texts, labels)
    dt_model = train_decision_tree_classifier(dt_texts, dt_labels)
    
    new_text = "I enjoy coding in Python"
    nb_prediction = predict_category(nb_model, new_text)
    
    new_dt_text = "You have won a free prize!"
    dt_prediction = predict_category(dt_model, new_dt_text)
    
    print("Bag of Words Representation:")
    print(bag_of_words(texts))
    
    print("\nTF-IDF Representation:")
    print(tfidf_representation(texts))
    
    print("\nPredicted Category (Naïve Bayes):", nb_prediction)
    print("\nPredicted Category (Decision Tree):", dt_prediction)


Bag of Words Representation:
   amazing  great  is  learning  love  machine  programming  python
0        0      0   0         0     1        0            1       0
1        0      1   1         0     0        0            0       1
2        1      0   1         1     0        1            0       0

TF-IDF Representation:
    amazing     great       is  learning      love   machine  programming  \
0  0.000000  0.000000  0.00000  0.000000  0.707107  0.000000     0.707107   
1  0.000000  0.622766  0.47363  0.000000  0.000000  0.000000     0.000000   
2  0.528635  0.000000  0.40204  0.528635  0.000000  0.528635     0.000000   

     python  
0  0.000000  
1  0.622766  
2  0.000000  

Predicted Category (Naïve Bayes): python

Predicted Category (Decision Tree): spam
