In [1]:
import numpy as np
import pandas as pd  
import nltk
import gensim 
from sklearn.datasets import fetch_20newsgroups 
from sklearn.model_selection import train_test_split


In [2]:
#这就是个定义拿data 的function
categories = ['talk.politics.guns','rec.sport.baseball'] # We focus on 2 news categories
def get_data():
    data = fetch_20newsgroups(subset='all',
                              shuffle=True,
                              categories=categories,
                              remove=('headers', 'footers', 'quotes'))
    return data

In [3]:
# get text data and their labels
dataset = get_data()
print(dataset.target_names)

corpus, labels = dataset.data, dataset.target

print('Sample document:', corpus[10])
print('Class label:',labels[10])
print('Actual class label:', dataset.target_names[labels[10]])

# split training dataset and testing dataset
train_corpus, test_corpus, train_labels, test_labels = train_test_split(corpus,
                                                                        labels,
                                                                        test_size=0.3)

['rec.sport.baseball', 'talk.politics.guns']
Sample document: For those who didn't figure it out, the below message was a reply to another
in sci.crypt, for which the poster put t.p.g. in the Followup-To line. I
didn't notice that. Apologies to those who were confused.

The substance makes little sense unless one reads the prior messages.

However, I don't wish to enter into this discussion here, as it will be yet
another rehearsal of a long-tired set of arguments. Suffice it to say that I
disagree both with the interpretation of "well-regulated" in the Second
Amendment offered by gun lovers, and what I think to be their distortion of
the same phrase in the associated Federalist papers. My Webster and my
reading of the language convinces me that the word meant both under control,
and disciplined, and not 'of good marksmanship'. I think the latter a
special interest pleading. No one has yet shown a contemporateous reference
in which "well regulated" unambiguously meant 'of good marksman

In [5]:
#bow features
from sklearn.feature_extraction.text import CountVectorizer #tokenizes and counts words

# build bag of words features' vectorizer and get features
bow_vectorizer=CountVectorizer(min_df=1, ngram_range=(1,1))


bow_train_features = bow_vectorizer.fit_transform(train_corpus)
bow_test_features = bow_vectorizer.transform(test_corpus) 


bow_train_features.A # Array 形式 
dftr_count = pd.DataFrame(data=bow_train_features.A, columns=bow_vectorizer.get_feature_names())
dftr_count.head()

Unnamed: 0,00,000,000152,000th,001,002,003,004,008,0094,...,zilch,zimring,zip,zog,zone,zones,zoning,zot,zupcic,zzzzzzt
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
# tfidf features
from sklearn.feature_extraction.text import TfidfVectorizer #alternatively, use TfidfTransformer()

tfidf_vectorizer=TfidfVectorizer(min_df=1, 
                                 norm='l2',
                                 smooth_idf=True,
                                 use_idf=True,
                                 ngram_range=(1,1))
tfidf_train_features = tfidf_vectorizer.fit_transform(train_corpus)  
tfidf_test_features = tfidf_vectorizer.transform(test_corpus) 
dftr_tfidf = pd.DataFrame(data=tfidf_train_features.A, columns=tfidf_vectorizer.get_feature_names())
dftr_tfidf.head()

Unnamed: 0,00,000,000152,000th,001,002,003,004,008,0094,...,zilch,zimring,zip,zog,zone,zones,zoning,zot,zupcic,zzzzzzt
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
### 这里讲的是word to vect了 
#这和上面的chart 内容一点都没关系

# tokenize documents for word2vec
tokenized_train = [nltk.word_tokenize(text)
                   for text in train_corpus]
tokenized_test = [nltk.word_tokenize(text)
                   for text in test_corpus]  

In [7]:
#word2vec就是要一个一个token的啊，同时还保持了原有文本，一个文本一个list
tokenized_train[:5]

[['As',
  'far',
  'as',
  'I',
  'know',
  ',',
  'there',
  'is',
  'no',
  'FAQ',
  'for',
  'tpg',
  '.',
  'Somebody',
  'was',
  'working',
  'on',
  'one',
  ',',
  'but',
  'I',
  'think',
  'it',
  '``',
  'died',
  'in',
  'committee',
  '.',
  "''"],
 ['I',
  'like',
  'Alomar',
  '.',
  'But',
  'I',
  "'d",
  'like',
  'to',
  'differ',
  'with',
  'your',
  'opinion',
  'about',
  '``',
  'a',
  'city',
  'which',
  'is',
  'likely',
  'to',
  'pour',
  'in',
  'the',
  'votes',
  '...',
  "''",
  '.',
  'I',
  'attended',
  'many',
  'games',
  'last',
  'year',
  'during',
  'the',
  'balloting',
  '.',
  'I',
  'know',
  'that',
  'a',
  'great',
  'number',
  'of',
  'the',
  'attendees',
  'DID',
  'NOT',
  'fill',
  'out',
  'their',
  'ballots',
  ',',
  'but',
  'left',
  'them',
  ',',
  'beer',
  'soaked',
  'and',
  'torn',
  'on',
  'the',
  'floor',
  'of',
  'the',
  'stands',
  '.',
  'Toronto',
  'gets',
  'no',
  'more',
  'and',
  'no',
  'less',
  'vote

In [8]:
# build word2vec model                   
wv_model = gensim.models.Word2Vec(tokenized_train,
                               size=200,                          #set the size or dimension for the word vectors 
                               window=60,                        #specify the length of the window of words taken as context
                               min_count=10)                   #ignores all words with total frequency lower than                     

In [9]:
wv_model[list(wv_model.wv.vocab)[4]]

  """Entry point for launching an IPython kernel.


array([-1.1535941 , -1.3129113 ,  0.23458871, -0.33068755,  0.43720597,
       -0.13894087, -0.8571105 , -0.00467367,  0.18444167, -0.6611506 ,
       -0.1670258 , -0.518119  ,  0.46856806, -0.91829175, -0.37597236,
        0.21820845, -0.28196785,  0.00758011, -0.36847767,  0.0980043 ,
       -0.38790295,  0.18841547, -0.01788563,  0.44226685, -0.24593763,
       -0.7527973 , -0.26011997,  0.01758193,  0.56317204, -0.31191692,
        0.10937613, -0.17004125,  0.60209215, -0.24984527, -0.39378774,
        0.2490019 ,  0.41330323, -0.35673538, -0.06816277,  0.1013827 ,
       -0.18191503,  0.10383177,  0.02907011, -0.43524066,  0.44067192,
       -0.02053437,  0.4044874 ,  1.0910525 , -0.6536461 , -0.20244399,
        0.31993043, -0.06161489,  0.8232507 , -0.00562888, -0.40769097,
        0.27065396, -0.22417703, -0.31463748, -0.14538246, -0.12616844,
       -0.01433324, -0.10571785, -0.1433176 ,  0.15190828, -0.24930778,
       -0.06964333, -0.21610467,  0.06465088, -0.3740505 ,  0.35

In [10]:
def average_word_vectors(words,
                         model, 
                         vocabulary, 
                         num_features): 
    
    feature_vector = np.zeros((num_features,),dtype="float64")#建一个维度与corpus一致的array
    nwords = 0.
    
    for word in words:
        if word in vocabulary: 
            nwords = nwords + 1.
            feature_vector = np.add(feature_vector, model[word])
    
    if nwords:
        feature_vector = np.divide(feature_vector, nwords)
        
    return feature_vector 
   

def averaged_word_vectorizer(corpus, model, num_features):
    vocabulary = set(model.wv.index2word)
    
    features = [average_word_vectors(tokenized_sentence, model, vocabulary, num_features)
                    for tokenized_sentence in corpus] #将每一个文本，就是每一个新闻，单独地放在 vectors function里头
    return np.array(features)
#累计相加求平均

##

In [11]:
# averaged word vector features from word2vec
avg_wv_train_features = averaged_word_vectorizer(corpus=tokenized_train,
                                                 model=wv_model,
                                                 num_features=200)                   
avg_wv_test_features = averaged_word_vectorizer(corpus=tokenized_test,
                                                model=wv_model,
                                                num_features=200) 

  if sys.path[0] == '':


In [12]:
print(avg_wv_train_features.shape)

(1332, 200)


In [13]:
from sklearn import metrics

# define a function to evaluate our classification models based on four metrics
def get_metrics(true_labels, predicted_labels):
    
    print ('Accuracy:', np.round(                                                    
                        metrics.accuracy_score(true_labels, 
                                               predicted_labels),
                        2))
    print ('Precision:', np.round(
                        metrics.precision_score(true_labels, 
                                               predicted_labels),
                        2))
    print ('Recall:', np.round(
                        metrics.recall_score(true_labels, 
                                               predicted_labels),
                        2))
    print ('F1 Score:', np.round(
                        metrics.f1_score(true_labels, 
                                               predicted_labels),
                        2))
                        

In [14]:
# define a function that trains the model, performs predictions and evaluates the predictions
def train_predict_evaluate_model(classifier, 
                                 train_features, train_labels, 
                                 test_features, test_labels):
    # build model    
    classifier.fit(train_features, train_labels)
    # predict using model
    predictions = classifier.predict(test_features) 
    # evaluate model prediction performance   
    get_metrics(true_labels=test_labels, 
                predicted_labels=predictions)
    return predictions    

In [15]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier

mnb = MultinomialNB()
svm = SGDClassifier(loss='hinge', max_iter=100)

# Multinomial Naive Bayes with bag of words features
mnb_bow_predictions = train_predict_evaluate_model(classifier=mnb,
                                           train_features=bow_train_features,
                                           train_labels=train_labels,
                                           test_features=bow_test_features,
                                           test_labels=test_labels)

Accuracy: 0.95
Precision: 0.96
Recall: 0.94
F1 Score: 0.95


In [16]:
#Support Vector Machine with bag of words features
svm_bow_predictions = train_predict_evaluate_model(classifier=svm,
                                           train_features=bow_train_features,
                                           train_labels=train_labels,
                                           test_features=bow_test_features,
                                           test_labels=test_labels)

Accuracy: 0.92
Precision: 0.96
Recall: 0.89
F1 Score: 0.92




In [17]:
#Support Vector Machine with bag of words features
svm_bow_predictions = train_predict_evaluate_model(classifier=svm,
                                           train_features=bow_train_features,
                                           train_labels=train_labels,
                                           test_features=bow_test_features,
                                           test_labels=test_labels)

Accuracy: 0.92
Precision: 0.95
Recall: 0.88
F1 Score: 0.91


In [23]:
# Support Vector Machine with tfidf features
svm_tfidf_predictions = train_predict_evaluate_model(classifier=svm,
                                           train_features=tfidf_train_features,
                                           train_labels=train_labels,
                                           test_features=tfidf_test_features,
                                           test_labels=test_labels)

Accuracy: 0.93
Precision: 0.98
Recall: 0.88
F1 Score: 0.92


In [18]:
#Support Vector Machine with bag of words features
svm_bow_predictions = train_predict_evaluate_model(classifier=svm,
                                           train_features=bow_train_features,
                                           train_labels=train_labels,
                                           test_features=bow_test_features,
                                           test_labels=test_labels)

Accuracy: 0.92
Precision: 0.95
Recall: 0.89
F1 Score: 0.92


In [19]:
#Support Vector Machine with bag of words features
svm_bow_predictions = train_predict_evaluate_model(classifier=svm,
                                           train_features=bow_train_features,
                                           train_labels=train_labels,
                                           test_features=bow_test_features,
                                           test_labels=test_labels)

Accuracy: 0.91
Precision: 0.95
Recall: 0.87
F1 Score: 0.91


In [25]:
# Observe false positive output
class_names = dataset.target_names
print (class_names[0], '->', class_names[1])

rec.sport.baseball -> talk.politics.guns


In [26]:
# Look at some misclassified documents in detail
import re

num = 0
for document, label, predicted_label in zip(test_corpus, test_labels, svm_tfidf_predictions):
    if label == 0 and predicted_label == 1:
        print ('Actual Label:', class_names[label])
        print ('Predicted Label:', class_names[predicted_label])
        print ('Document:-')
        print (re.sub('\n', ' ', document))
        num += 1
        if num == 4:
            break

Actual Label: rec.sport.baseball
Predicted Label: talk.politics.guns
Document:-
  I was in fact going to suggest that Roger take his way of discussion over to r.s.football.pro. There this kind of hormone-only reasoning is the standard. Being he canadian, and hockey what it is, I would have suggested that r.s.h would work too. It is important in a thread that everyone involved use the same body part to produce a post (brain being the organ of choice here).
Actual Label: rec.sport.baseball
Predicted Label: talk.politics.guns
Document:-
If there is a "USA Today ftp site" could someone please post it to the     newsgroup so everyone will stop posting the "send it to me too" articles.    I'm sure many people woulds like to know so why not just post it to the net    rather than mailing hundreds of people.      Just a thought.        Thanks,    Ryan Robin.
Actual Label: rec.sport.baseball
Predicted Label: talk.politics.guns
Document:-
         When I say "black," I mean US-born black people f

## My Part


In [27]:
#Model！
import xgboost as xgb
from sklearn.linear_model import LogisticRegression # logistics 

from sklearn.linear_model import Perceptron # perceptron

from sklearn.naive_bayes import GaussianNB # gaussion 

from sklearn.ensemble import RandomForestClassifier # 

from sklearn.svm import LinearSVC #linear

from sklearn.svm import SVC #

from sklearn.neighbors import KNeighborsClassifier # 

from sklearn.model_selection import KFold #



from sklearn.model_selection import GridSearchCV #
from sklearn.metrics import confusion_matrix

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
import csv
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import RandomizedSearchCV
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [28]:
#These model have been tuned
# Classifiers that we are going to use
clf1 = GaussianNB()

#perceptron
clf2 = Perceptron(tol=1e-3, random_state=42,alpha= 0.0001, n_iter= 15)

#Tuned Decision Tree Parameters: {'min_samples_leaf': 7, 'max_features': 3, 'criterion': 'entropy', 'max_depth': 8
clf3 = RandomForestClassifier(n_estimators='warn', criterion='entropy', max_depth=8, 
                          min_samples_split=2, min_samples_leaf=7, 
                          min_weight_fraction_leaf=0.0, max_features=3, 
                          max_leaf_nodes=None, min_impurity_decrease=0.0, 
                          min_impurity_split=None, bootstrap=True, oob_score=False, 
                          n_jobs=None, random_state=None, verbose=0, warm_start=False, 
                          class_weight=None)

#linear SVM
clf4_5 = LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=1e-05, verbose=3)
#SVM {'C': 10, 'degree': 1, 'gamma': 0.01, 'kernel': 'poly'}
clf4 = SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=1, gamma=0.01,
  kernel='poly', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=3)

#k-nearest neighbor
clf5 = KNeighborsClassifier(n_neighbors= 41)
#'penalty': 'l2', 'C': 0.05179474679231213

#Logistics regresion
clf6 = LogisticRegression(solver='liblinear', C=0.4393970560760795,penalty='l2')


Clf_list = [clf1,clf2,clf3,clf4,clf5,clf6]
#Keras


#pytorch





In [29]:
# Perceptron with bag of words features
Per_bow_predictions = train_predict_evaluate_model(classifier=clf2,
                                           train_features=bow_train_features.A,
                                           train_labels=train_labels,
                                           test_features=bow_test_features.A,
                                           test_labels=test_labels)
print(

)
# per with tfidf features
per_tfidf_predictions = train_predict_evaluate_model(classifier=clf2,
                                           train_features=tfidf_train_features,
                                           train_labels=train_labels,
                                           test_features=tfidf_test_features,
                                           test_labels=test_labels)



Accuracy: 0.91
Precision: 0.92
Recall: 0.89
F1 Score: 0.91

Accuracy: 0.91
Precision: 0.94
Recall: 0.87
F1 Score: 0.9




In [30]:
# Random_forest with bag of words features
rdf_bow_predictions = train_predict_evaluate_model(classifier=clf3,
                                           train_features=bow_train_features.A,
                                           train_labels=train_labels,
                                           test_features=bow_test_features.A,
                                           test_labels=test_labels)

# rdf with tfidf features
rdf_tfidf_predictions = train_predict_evaluate_model(classifier=clf3,
                                           train_features=tfidf_train_features,
                                           train_labels=train_labels,
                                           test_features=tfidf_test_features,
                                           test_labels=test_labels)



Accuracy: 0.51
Precision: 0.0
Recall: 0.0
F1 Score: 0.0
Accuracy: 0.51
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [31]:
# SVM with bag of words features
svm_bow_predictions = train_predict_evaluate_model(classifier=clf4,
                                           train_features=bow_train_features.A,
                                           train_labels=train_labels,
                                           test_features=bow_test_features.A,
                                           test_labels=test_labels)

# Support Vector Machine with tfidf features
svm_tfidf_predictions = train_predict_evaluate_model(classifier=clf4,
                                           train_features=tfidf_train_features,
                                           train_labels=train_labels,
                                           test_features=tfidf_test_features,
                                           test_labels=test_labels)

[LibSVM]Accuracy: 0.9
Precision: 0.95
Recall: 0.83
F1 Score: 0.89
[LibSVM]Accuracy: 0.84
Precision: 0.98
Recall: 0.68
F1 Score: 0.81


In [32]:
# Linear svm with bag of words features
lsvm_bow_predictions = train_predict_evaluate_model(classifier=clf4_5,
                                           train_features=bow_train_features.A,
                                           train_labels=train_labels,
                                           test_features=bow_test_features.A,
                                           test_labels=test_labels)

lsvm_tfidf_predictions = train_predict_evaluate_model(classifier=clf4_5,
                                           train_features=tfidf_train_features,
                                           train_labels=train_labels,
                                           test_features=tfidf_test_features,
                                           test_labels=test_labels)

[LibLinear]Accuracy: 0.9
Precision: 0.96
Recall: 0.83
F1 Score: 0.89
[LibLinear]Accuracy: 0.94
Precision: 0.96
Recall: 0.91
F1 Score: 0.93


In [33]:
# KNN with bag of words features
KNN_bow_predictions = train_predict_evaluate_model(classifier=clf5,
                                           train_features=bow_train_features.A,
                                           train_labels=train_labels,
                                           test_features=bow_test_features.A,
                                           test_labels=test_labels)

# KNN with tfidf features
KNN_tfidf_predictions = train_predict_evaluate_model(classifier=clf5,
                                           train_features=tfidf_train_features,
                                           train_labels=train_labels,
                                           test_features=tfidf_test_features,
                                           test_labels=test_labels)

Accuracy: 0.64
Precision: 0.73
Recall: 0.42
F1 Score: 0.54
Accuracy: 0.51
Precision: 1.0
Recall: 0.0
F1 Score: 0.01


In [34]:
# Logistics with bag of words features
log_bow_predictions = train_predict_evaluate_model(classifier=clf6,
                                           train_features=bow_train_features.A,
                                           train_labels=train_labels,
                                           test_features=bow_test_features.A,
                                           test_labels=test_labels)

# Support Vector Machine with tfidf features
log_tfidf_predictions = train_predict_evaluate_model(classifier=clf6,
                                           train_features=tfidf_train_features,
                                           train_labels=train_labels,
                                           test_features=tfidf_test_features,
                                           test_labels=test_labels)

Accuracy: 0.92
Precision: 0.97
Recall: 0.86
F1 Score: 0.91
Accuracy: 0.91
Precision: 0.95
Recall: 0.86
F1 Score: 0.9


In [35]:
#data preparing
clf_list = [mnb,clf2,clf1,clf4,clf6]
bow = (bow_train_features.A,train_labels,bow_test_features.A,test_labels)


tif = (tfidf_train_features.A,train_labels,tfidf_test_features.A,test_labels)
w2v = (avg_wv_train_features,train_labels,avg_wv_test_features,test_labels)

In [39]:
def Majority_vote(data,clfs):
    X_train, y_train, X_test, y_test = data
    vote = np.zeros(X_test.shape[0])
    for clf in clfs:
        clf.fit(X_train,y_train)
        predictions = clf.predict(X_test)
        vote = np.add(vote,predictions)
        print(clf)
        print(vote)
    answer = np.array([1 if n >= np.round(len(clfs)) else 0 for n in vote])
    pred = get_metrics(true_labels=y_test, 
                predicted_labels=answer)
    return pred
            
    

## ensemble

In [41]:
Majority_vote(bow,clfs=clf_list)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
[1. 1. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0.
 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1.
 1. 0. 1. 1. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1.
 0. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.
 0. 1. 0. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 0. 0. 0. 1.
 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0.
 1. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 1. 1. 1. 1.
 1. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 0. 1. 1. 0.
 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0.
 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 0.
 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1.
 0. 0. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 1.
 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0



Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
      fit_intercept=True, max_iter=None, n_iter=15, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=42, shuffle=True, tol=0.001,
      validation_fraction=0.1, verbose=0, warm_start=False)
[2. 2. 2. 0. 1. 0. 2. 2. 2. 0. 0. 0. 0. 2. 2. 2. 0. 0. 2. 2. 2. 0. 2. 0.
 2. 2. 0. 0. 2. 2. 2. 1. 1. 2. 0. 0. 0. 2. 1. 2. 0. 0. 2. 0. 0. 1. 2. 2.
 2. 0. 2. 2. 0. 0. 2. 2. 2. 2. 0. 2. 1. 2. 2. 0. 2. 2. 2. 0. 2. 2. 2. 2.
 0. 2. 0. 2. 2. 0. 1. 2. 0. 2. 0. 0. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 2. 0.
 0. 2. 0. 2. 2. 2. 2. 2. 0. 0. 0. 2. 0. 2. 0. 2. 0. 2. 2. 0. 0. 0. 0. 2.
 1. 2. 0. 0. 2. 0. 2. 2. 0. 2. 2. 0. 1. 0. 2. 0. 2. 2. 2. 0. 0. 1. 0. 0.
 2. 0. 0. 2. 0. 1. 2. 2. 0. 0. 0. 2. 0. 0. 2. 0. 2. 0. 2. 0. 2. 2. 2. 2.
 2. 2. 0. 0. 0. 2. 0. 0. 2. 0. 0. 2. 0. 2. 0. 0. 0. 2. 2. 0. 0. 2. 2. 0.
 0. 0. 2. 2. 0. 2. 2. 2. 2. 2. 2. 0. 0. 0. 0. 2. 1. 0. 0. 0. 0. 2. 0. 0.
 2. 1. 0. 0. 2. 0. 2. 0. 2. 0. 2. 2. 2. 0. 0. 2. 2. 1. 2. 0.