# 1.Fit and Transform Separately

In [10]:
from sklearn.feature_extraction.text import CountVectorizer

text_data = ["GFG is providing a new Deep Learning Course which is really good",
             "We will be sturdying Deep Learning from today",
             "I want a Deep Sleep today"]

vec = CountVectorizer()
vec = vec.fit(text_data)

data_transformed = vec.transform(text_data)
print(vec.vocabulary_)
data_transformed.toarray()# we will see how to remove stopwords below

{'gfg': 4, 'is': 6, 'providing': 9, 'new': 8, 'deep': 2, 'learning': 7, 'course': 1, 'which': 16, 'really': 10, 'good': 5, 'we': 15, 'will': 17, 'be': 0, 'sturdying': 12, 'from': 3, 'today': 13, 'want': 14, 'sleep': 11}


array([[0, 1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0],
       [1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0]])

In [8]:
from pandas import DataFrame
df = DataFrame(data_transformed.toarray(), columns = vec.vocabulary_)
df

Unnamed: 0,gfg,is,providing,new,deep,learning,course,which,really,good,we,will,be,sturdying,from,today,want,sleep
0,0,1,1,0,1,1,2,1,1,1,1,0,0,0,0,0,1,0
1,1,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1
2,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0


# 2.Fit and Transform Combined

In [11]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ["GFG is providing a new Deep Learning Course which is really good",
             "We will be sturdying Deep Learning from today",
             "I want a Deep Sleep today"]

vec = CountVectorizer()

data_transformed = vec.fit_transform(text_data)
print(vec.vocabulary_)
data_transformed.toarray()

df = DataFrame(data_transformed.toarray(), columns = vec.vocabulary_)
df

{'gfg': 4, 'is': 6, 'providing': 9, 'new': 8, 'deep': 2, 'learning': 7, 'course': 1, 'which': 16, 'really': 10, 'good': 5, 'we': 15, 'will': 17, 'be': 0, 'sturdying': 12, 'from': 3, 'today': 13, 'want': 14, 'sleep': 11}


Unnamed: 0,gfg,is,providing,new,deep,learning,course,which,really,good,we,will,be,sturdying,from,today,want,sleep
0,0,1,1,0,1,1,2,1,1,1,1,0,0,0,0,0,1,0
1,1,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1
2,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0


# 3.Dealing With StopWords

In [15]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ["GFG is providing a new Deep Learning Course which is really good",
             "We will be sturdying Deep Learning from today",
             "I want a Deep Sleep today"]

vec = CountVectorizer(stop_words = 'english')
vec = vec.fit(text_data)

data_transformed = vec.transform(text_data)

df = DataFrame(data_transformed.toarray(), columns = vec.vocabulary_)
df

Unnamed: 0,gfg,providing,new,deep,learning,course,really,good,sturdying,today,want,sleep
0,1,1,1,1,1,1,1,1,0,0,0,0
1,0,1,0,0,1,0,0,0,0,1,1,0
2,0,1,0,0,0,0,0,0,1,0,1,1


# 4.Dealing with N-Gram

In [20]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ["GFG is providing a new Deep Learning Course which is really good",
             "We will be sturdying Deep Learning from today",
             "I want a Deep Sleep today"]

vec = CountVectorizer(ngram_range = (1,3))
vec = vec.fit(text_data)

data_transformed = vec.transform(text_data)

df = DataFrame(data_transformed.toarray(), columns = vec.vocabulary_)
df

Unnamed: 0,gfg,is,providing,new,deep,learning,course,which,really,good,...,sturdying deep learning,deep learning from,learning from today,want,sleep,want deep,deep sleep,sleep today,want deep sleep,deep sleep today
0,0,0,0,1,1,1,1,1,1,0,...,0,0,0,0,1,1,1,0,0,0
1,1,1,1,0,0,0,1,1,0,1,...,0,1,1,1,0,0,0,1,1,1
2,0,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0


# 5.Count Vectorizer with Data Preprocessing

In [22]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ["GFG is providing a new Deep Learning Course which is really good",
             "We will be sturdying Deep Learning from today",
             "I want a Deep Sleep today"]

vec = CountVectorizer(ngram_range = (1,1),stop_words = 'english' , lowercase = True)
vec = vec.fit(text_data)

data_transformed = vec.transform(text_data)

df = DataFrame(data_transformed.toarray(), columns = vec.vocabulary_)
df

Unnamed: 0,gfg,providing,new,deep,learning,course,really,good,sturdying,today,want,sleep
0,1,1,1,1,1,1,1,1,0,0,0,0
1,0,1,0,0,1,0,0,0,0,1,1,0
2,0,1,0,0,0,0,0,0,1,0,1,1
