## 1. Fit and Transform Separately

In [17]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ['GFG is providing new deep learning Course which is really good',
             'We will be studying Deep Learning from today',
             'I want a deep study today']

vec = CountVectorizer()
vec.fit(text_data)

data_transformed = vec.transform(text_data)
DataFrame(data_transformed.toarray(), columns = vec.vocabulary_)

Unnamed: 0,gfg,is,providing,new,deep,learning,course,which,really,good,we,will,be,studying,from,today,want,study
0,0,1,1,0,1,1,2,1,1,1,1,0,0,0,0,0,1,0
1,1,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1
2,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0


## 2. Fit and Transform Combined

In [18]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ['GFG is providing new deep learning Course which is really good',
             'We will be studying Deep Learning from today',
             'I want a deep study today']

vec = CountVectorizer()
data_transformed = vec.fit_transform(text_data)

DataFrame(data_transformed.toarray(), columns = vec.vocabulary_)

Unnamed: 0,gfg,is,providing,new,deep,learning,course,which,really,good,we,will,be,studying,from,today,want,study
0,0,1,1,0,1,1,2,1,1,1,1,0,0,0,0,0,1,0
1,1,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1
2,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0


## 3. Dealing with StopWords

In [21]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ['GFG is providing new deep learning Course which is really good',
             'We will be studying Deep Learning from today',
             'I want a deep study today']

vec = CountVectorizer(stop_words='english')
data_transformed = vec.fit_transform(text_data)

DataFrame(data_transformed.toarray(), columns = vec.vocabulary_)

Unnamed: 0,gfg,providing,new,deep,learning,course,really,good,studying,today,want,study
0,1,1,1,1,1,1,1,1,0,0,0,0
1,0,1,0,0,1,0,0,0,0,1,1,0
2,0,1,0,0,0,0,0,0,1,0,1,1


## 4. Dealing with N-Gram

In [23]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ['GFG is providing new deep learning Course which is really good',
             'We will be studying Deep Learning from today',
             'I want a deep study today']

vec = CountVectorizer(ngram_range=(2,2))
data_transformed = vec.fit_transform(text_data)

DataFrame(data_transformed.toarray(), columns=vec.vocabulary_)

Unnamed: 0,gfg is,is providing,providing new,new deep,deep learning,learning course,course which,which is,is really,really good,we will,will be,be studying,studying deep,learning from,from today,want deep,deep study,study today
0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,0
1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1
2,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0


## 5. Count Vectoriser with Data Preprocessing

In [25]:
from sklearn.feature_extraction.text import CountVectorizer
from pandas import DataFrame

text_data = ['GFG is providing new deep learning Course which is really good',
             'We will be studying Deep Learning from today',
             'I want a deep study today']

vec = CountVectorizer(ngram_range=(2,2), stop_words='english', lowercase=True)
data_transformed = vec.fit_transform(text_data)

DataFrame(data_transformed.toarray(), columns=vec.vocabulary_)

Unnamed: 0,gfg providing,providing new,new deep,deep learning,learning course,course really,really good,studying deep,learning today,want deep,deep study,study today
0,1,1,0,1,1,0,1,1,1,0,0,0
1,0,1,0,0,0,1,0,0,0,0,1,0
2,0,0,1,0,0,0,0,0,0,1,0,1
