# Bag_Of_Word

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

def bag_of_word(sentences, verbose=1):
    """
    Convert a list of sentences into a Bag of Words representation.
    
    Parameters:
    sentences (list of str): A list of sentences to vectorize.
    verbose (int): If 1, prints the number of documents and words. If 0, silent.
    
    Returns:
    count_vectorizer (vectorizer): The scikit-learn vectorizer fitted on the input sentences.
    vectorized_sentences (ndarray): A 2D array where each row represents a document.
    feature_names (ndarray): An array of feature names (words). 
    """
   
    count_vectorizer = CountVectorizer()
    features = count_vectorizer.fit_transform(sentences)
    vectorized_sentences = features.toarray()
    feature_names = count_vectorizer.get_feature_names_out()
    if verbose == 1:
        df = pd.DataFrame(vectorized_sentences, columns=feature_names)
        print(f"Num of Documents: {features.shape[0]}")
        print(f"Num of Words: {features.shape[1]}")
        print()
        print(df)
    elif verbose == 0:
        pass
    return count_vectorizer, vectorized_sentences,feature_names

sentences = ['I love my dog.',
             'I love my cat.',
             'I love my dog and love my cat',
             'You love my dog!',
             'Do you think my dog is amazing?']

count_vectorizer, vectorized_sentences,feature_names = bag_of_word(sentences, verbose=1)

Num of Documents: 5
Num of Words: 10

   amazing  and  cat  do  dog  is  love  my  think  you
0        0    0    0   0    1   0     1   1      0    0
1        0    0    1   0    0   0     1   1      0    0
2        0    1    1   0    1   0     2   2      0    0
3        0    0    0   0    1   0     1   1      0    1
4        1    0    0   1    1   1     0   1      1    1
