In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MaxAbsScaler
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
train = pd.read_csv("Tweets/tweet4-even-train.csv", engine="python", encoding="utf-8")
test = pd.read_csv("Tweets/tweet4-even-test.csv", engine="python", encoding="utf-8")

In [3]:
vectorizer = CountVectorizer(
    input='content',
    stop_words='english',
    max_df=1.0,
    min_df=1,
    binary=False
)

vectorizer.fit(pd.concat([train.text, test.text]).values)

X_train = vectorizer.transform(train.text.values)
y_train = train.id.values

X_test = vectorizer.transform(test.text.values)
y_test = test.id.values

In [4]:
test.id.unique()

array([u'Trump', u'Obama', u'Ellen', u'Elon'], dtype=object)

In [5]:
mlp = MLPClassifier(
    max_iter=100,
    hidden_layer_sizes=(500, 200, 30, len(test.id.unique()))
)

mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(500, 200, 30, 4), learning_rate='constant',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [6]:
predictions = mlp.predict(X_test)

In [7]:
print("Train set size: {}\nTest set size: {}".format(X_train.shape, X_test.shape))

Train set size: (10234, 24287)
Test set size: (2598, 24287)


In [8]:
print(confusion_matrix(y_test, predictions))

[[534  13  86  62]
 [ 34 516  62  41]
 [ 64  15 529  12]
 [ 20  16  18 576]]


In [9]:
print(classification_report(y_test, predictions))

             precision    recall  f1-score   support

      Ellen       0.82      0.77      0.79       695
       Elon       0.92      0.79      0.85       653
      Obama       0.76      0.85      0.80       620
      Trump       0.83      0.91      0.87       630

avg / total       0.83      0.83      0.83      2598

