In [3]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, MultinomialNB

In [4]:
wine = load_wine()
wine

{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
         1.065e+03],
        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
         1.050e+03],
        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
         1.185e+03],
        ...,
        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
         8.350e+02],
        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
         8.400e+02],
        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
         5.600e+02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [5]:
wine.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])

In [8]:
wine.target_names

array(['class_0', 'class_1', 'class_2'], dtype='<U7')

In [11]:
print(wine.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

:Number of Instances: 178
:Number of Attributes: 13 numeric, predictive attributes and the class
:Attribute Information:
    - Alcohol
    - Malic acid
    - Ash
    - Alcalinity of ash
    - Magnesium
    - Total phenols
    - Flavanoids
    - Nonflavanoid phenols
    - Proanthocyanins
    - Color intensity
    - Hue
    - OD280/OD315 of diluted wines
    - Proline
    - class:
        - class_0
        - class_1
        - class_2

:Summary Statistics:

                                Min   Max   Mean     SD
Alcohol:                      11.0  14.8    13.0   0.8
Malic Acid:                   0.74  5.80    2.34  1.12
Ash:                          1.36  3.23    2.36  0.27
Alcalinity of Ash:            10.6  30.0    19.5   3.3
Magnesium:                    70.0 162.0    99.7  14.3
Total Phenols:                0.98  3.88    2.29  0.63
Flavanoids:                   0.34  5.08    2.03  1.00

In [12]:
wine.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [13]:
data = pd.DataFrame(wine.data,columns=wine.feature_names)
data

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0


In [16]:
data["target"]=wine.target
data

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0,2
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0,2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0,2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0,2


In [17]:
data.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [18]:
data.size

2492

In [19]:
data.shape

(178, 14)

In [20]:
x = data.drop("target",axis=1)
y = data["target"]

In [21]:
x

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0


In [22]:
y

0      0
1      0
2      0
3      0
4      0
      ..
173    2
174    2
175    2
176    2
177    2
Name: target, Length: 178, dtype: int64

In [23]:
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size=0.20, random_state=42)

In [24]:
model1=GaussianNB()

In [25]:
model1.fit(x_train,y_train)
model1.score(x_test,y_test)

1.0

In [26]:
model2 = MultinomialNB()
model2.fit(x_train,y_train)

In [27]:
model2.score(x_test,y_test)

0.8888888888888888

In [28]:
y_pred = model1.predict(x_test)
y_pred

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0])

In [30]:
pd.DataFrame({"actual value":y_test,"predicted value":y_pred})

Unnamed: 0,actual value,predicted value
19,0,0
45,0,0
140,2,2
30,0,0
67,1,1
16,0,0
119,1,1
174,2,2
109,1,1
141,2,2


In [36]:
model1.predict([[15.23,2.71,	3.43,	16.6,	128.0,	3.80,	4.06,	1.28,	3.29,	4.64,	2.04,	4.92,	1066.0]])



array([1])

In [89]:
tit = pd.read_csv("titanic (3).csv")
tit

Unnamed: 0,PassengerId,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,"Braund, Mr. Owen Harris",3,male,22.0,1,0,A/5 21171,7.2500,,S,0
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,1
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,"Allen, Mr. William Henry",3,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,male,27.0,0,0,211536,13.0000,,S,0
887,888,"Graham, Miss. Margaret Edith",1,female,19.0,0,0,112053,30.0000,B42,S,1
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,female,,1,2,W./C. 6607,23.4500,,S,0
889,890,"Behr, Mr. Karl Howell",1,male,26.0,0,0,111369,30.0000,C148,C,1


In [90]:
tit.isna().sum()

PassengerId      0
Name             0
Pclass           0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
Survived         0
dtype: int64

In [91]:
tit.dtypes

PassengerId      int64
Name            object
Pclass           int64
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
Survived         int64
dtype: object

In [92]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
tit["Name"]=le.fit_transform(tit["Name"])
tit

Unnamed: 0,PassengerId,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,108,3,male,22.0,1,0,A/5 21171,7.2500,,S,0
1,2,190,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,353,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,1
3,4,272,1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,15,3,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,548,2,male,27.0,0,0,211536,13.0000,,S,0
887,888,303,1,female,19.0,0,0,112053,30.0000,B42,S,1
888,889,413,3,female,,1,2,W./C. 6607,23.4500,,S,0
889,890,81,1,male,26.0,0,0,111369,30.0000,C148,C,1


In [93]:
tit

Unnamed: 0,PassengerId,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,108,3,male,22.0,1,0,A/5 21171,7.2500,,S,0
1,2,190,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,353,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,1
3,4,272,1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,15,3,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,548,2,male,27.0,0,0,211536,13.0000,,S,0
887,888,303,1,female,19.0,0,0,112053,30.0000,B42,S,1
888,889,413,3,female,,1,2,W./C. 6607,23.4500,,S,0
889,890,81,1,male,26.0,0,0,111369,30.0000,C148,C,1


In [94]:
tit["Sex"]=le.fit_transform(tit["Sex"])
tit

Unnamed: 0,PassengerId,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,108,3,1,22.0,1,0,A/5 21171,7.2500,,S,0
1,2,190,1,0,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,353,3,0,26.0,0,0,STON/O2. 3101282,7.9250,,S,1
3,4,272,1,0,35.0,1,0,113803,53.1000,C123,S,1
4,5,15,3,1,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,548,2,1,27.0,0,0,211536,13.0000,,S,0
887,888,303,1,0,19.0,0,0,112053,30.0000,B42,S,1
888,889,413,3,0,,1,2,W./C. 6607,23.4500,,S,0
889,890,81,1,1,26.0,0,0,111369,30.0000,C148,C,1


In [95]:
tit["Ticket"]=le.fit_transform(tit["Ticket"])
tit

Unnamed: 0,PassengerId,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,108,3,1,22.0,1,0,523,7.2500,,S,0
1,2,190,1,0,38.0,1,0,596,71.2833,C85,C,1
2,3,353,3,0,26.0,0,0,669,7.9250,,S,1
3,4,272,1,0,35.0,1,0,49,53.1000,C123,S,1
4,5,15,3,1,35.0,0,0,472,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,548,2,1,27.0,0,0,101,13.0000,,S,0
887,888,303,1,0,19.0,0,0,14,30.0000,B42,S,1
888,889,413,3,0,,1,2,675,23.4500,,S,0
889,890,81,1,1,26.0,0,0,8,30.0000,C148,C,1


In [96]:
tit.dtypes

PassengerId      int64
Name             int64
Pclass           int64
Sex              int64
Age            float64
SibSp            int64
Parch            int64
Ticket           int64
Fare           float64
Cabin           object
Embarked        object
Survived         int64
dtype: object

In [97]:
tit["Embarked"]=le.fit_transform(tit["Embarked"])
tit

Unnamed: 0,PassengerId,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,108,3,1,22.0,1,0,523,7.2500,,2,0
1,2,190,1,0,38.0,1,0,596,71.2833,C85,0,1
2,3,353,3,0,26.0,0,0,669,7.9250,,2,1
3,4,272,1,0,35.0,1,0,49,53.1000,C123,2,1
4,5,15,3,1,35.0,0,0,472,8.0500,,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,548,2,1,27.0,0,0,101,13.0000,,2,0
887,888,303,1,0,19.0,0,0,14,30.0000,B42,2,1
888,889,413,3,0,,1,2,675,23.4500,,2,0
889,890,81,1,1,26.0,0,0,8,30.0000,C148,0,1


In [98]:
del tit["PassengerId"]

In [99]:
del tit["Embarked"]

In [100]:
del tit["Cabin"]

In [101]:
del tit["Fare"]

In [102]:
tit

Unnamed: 0,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Survived
0,108,3,1,22.0,1,0,523,0
1,190,1,0,38.0,1,0,596,1
2,353,3,0,26.0,0,0,669,1
3,272,1,0,35.0,1,0,49,1
4,15,3,1,35.0,0,0,472,0
...,...,...,...,...,...,...,...,...
886,548,2,1,27.0,0,0,101,0
887,303,1,0,19.0,0,0,14,1
888,413,3,0,,1,2,675,0
889,81,1,1,26.0,0,0,8,1


In [103]:
del tit["SibSp"]

In [104]:
del tit["Parch"]

In [134]:
tit["Age"]=tit["Age"].fillna(me)
tit

Unnamed: 0,Name,Pclass,Sex,Age,Ticket,Survived
0,108,3,1,22.0,523,0
1,190,1,0,38.0,596,1
2,353,3,0,26.0,669,1
3,272,1,0,35.0,49,1
4,15,3,1,35.0,472,0
...,...,...,...,...,...,...
886,548,2,1,27.0,101,0
887,303,1,0,19.0,14,1
888,413,3,0,0.0,675,0
889,81,1,1,26.0,8,1


In [135]:
tit

Unnamed: 0,Name,Pclass,Sex,Age,Ticket,Survived
0,108,3,1,22.0,523,0
1,190,1,0,38.0,596,1
2,353,3,0,26.0,669,1
3,272,1,0,35.0,49,1
4,15,3,1,35.0,472,0
...,...,...,...,...,...,...
886,548,2,1,27.0,101,0
887,303,1,0,19.0,14,1
888,413,3,0,0.0,675,0
889,81,1,1,26.0,8,1


In [136]:
x = tit.drop("Survived",axis=1)
y = tit["Survived"]

In [137]:
x

Unnamed: 0,Name,Pclass,Sex,Age,Ticket
0,108,3,1,22.0,523
1,190,1,0,38.0,596
2,353,3,0,26.0,669
3,272,1,0,35.0,49
4,15,3,1,35.0,472
...,...,...,...,...,...
886,548,2,1,27.0,101
887,303,1,0,19.0,14
888,413,3,0,0.0,675
889,81,1,1,26.0,8


In [138]:
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [139]:
tit.dtypes

Name          int64
Pclass        int64
Sex           int64
Age         float64
Ticket        int64
Survived      int64
dtype: object

In [140]:
tit.isna().sum()

Name        0
Pclass      0
Sex         0
Age         0
Ticket      0
Survived    0
dtype: int64

In [141]:
me = tit["Age"].mean()
me

np.float64(23.79929292929293)

In [142]:
tit["Age"].value_counts()

Age
0.00     177
24.00     30
22.00     27
18.00     26
28.00     25
        ... 
24.50      1
0.67       1
0.42       1
34.50      1
74.00      1
Name: count, Length: 89, dtype: int64

In [143]:
tit.isnull().sum()

Name        0
Pclass      0
Sex         0
Age         0
Ticket      0
Survived    0
dtype: int64

In [144]:
tit.dtypes

Name          int64
Pclass        int64
Sex           int64
Age         float64
Ticket        int64
Survived      int64
dtype: object

In [145]:
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size=0.20, random_state=42)

In [146]:
model = GaussianNB()
model.fit(x_train,y_train)

In [147]:
model.score(x_test,y_test)

0.7988826815642458

### SPAM DETECTION

In [148]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [149]:
spam = pd.read_csv("spam.csv")
spam

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will ü b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


In [150]:
spam.dtypes

Category    object
Message     object
dtype: object

In [151]:
spam.size

11144

In [152]:
spam.shape

(5572, 2)

In [153]:
spam.head()

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [157]:
x = spam.Message
y = spam.Category

In [158]:
x

0       Go until jurong point, crazy.. Available only ...
1                           Ok lar... Joking wif u oni...
2       Free entry in 2 a wkly comp to win FA Cup fina...
3       U dun say so early hor... U c already then say...
4       Nah I don't think he goes to usf, he lives aro...
                              ...                        
5567    This is the 2nd time we have tried 2 contact u...
5568                 Will ü b going to esplanade fr home?
5569    Pity, * was in mood for that. So...any other s...
5570    The guy did some bitching but I acted like i'd...
5571                           Rofl. Its true to its name
Name: Message, Length: 5572, dtype: object

In [159]:
y 

0        ham
1        ham
2       spam
3        ham
4        ham
        ... 
5567    spam
5568     ham
5569     ham
5570     ham
5571     ham
Name: Category, Length: 5572, dtype: object

In [160]:
x_train,x_test,y_train,y_test=train_test_split(x,y, test_size=.20,random_state=42)

In [161]:
from sklearn.feature_extraction.text import CountVectorizer  

In [162]:
cv = CountVectorizer() 
x_train_counter = cv.fit_transform(x_train)
x_train_counter 

<4457x7701 sparse matrix of type '<class 'numpy.int64'>'
	with 59275 stored elements in Compressed Sparse Row format>

In [164]:
cv.get_feature_names_out() 

array(['00', '000', '000pes', ..., 'zyada', 'èn', 'ú1'], dtype=object)

In [165]:
pd.DataFrame(x_train_counter.toarray(),columns=cv.get_feature_names_out())   

Unnamed: 0,00,000,000pes,008704050406,0089,0121,01223585236,01223585334,02,0207,...,zeros,zhong,zindgi,zoe,zogtorius,zoom,zouk,zyada,èn,ú1
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4452,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4453,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4454,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4455,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [167]:
from sklearn.naive_bayes import MultinomialNB

In [168]:
model = MultinomialNB()
model.fit(x_train_counter,y_train)

In [169]:
model.score(cv.transform(x_test),y_test)

0.9919282511210762

In [170]:
emails = ["can we get together to watch football game tommorow",
         "up to 20% discount on parking, exclusive offer just for you"]

email_counter = cv.transform(emails)
model.predict(email_counter)

array(['ham', 'spam'], dtype='<U4')