In [4]:
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
newsgroups = fetch_20newsgroups(subset='all')
texts = newsgroups.data
labels = newsgroups.target

X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

In [6]:
vector_machine = TfidfVectorizer(stop_words='english', max_df=0.71)
X_train_vetor = vector_machine.fit_transform(X_train)
X_test_vetor = vector_machine.transform(X_test)

##RandomForest

In [7]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

random_forest = RandomForestClassifier(random_state=42, max_depth=32, n_estimators=100)

In [8]:
resultado_rf = random_forest.fit(X_train_vetor, y_train)

train_predicted_rf = random_forest.predict(X_train_vetor)
train_expected_rf = y_train

accuracy_rf = accuracy_score(train_expected_rf, train_predicted_rf)

print(confusion_matrix(train_expected_rf, train_predicted_rf))
print("Accuracy:", accuracy_rf)
print(classification_report(train_expected_rf, train_predicted_rf))

[[607   1   2   0   2   0   2   0   0   0   0   2  10   3   3  16   0   0
    0   0]
 [  0 684  27   3   6  10   5   0   2   2   2   2  23   3   2   0   0   0
    0   0]
 [  0   6 751   8   3   3   1   1   0   0   0   2  12   1   1   1   0   0
    0   0]
 [  0   7  20 724   6   2  12   3   0   0   1   2  20   2   0   0   0   0
    0   0]
 [  0   9   8  14 686   2  18   1   0   1   0   0  17   2   0   0   0   0
    0   0]
 [  0  17  25   3   0 700   0   0   0   0   1   0  18   3   5   1   0   0
    0   0]
 [  0   3   5   7   5   2 717   5   3   2   1   0  25   2   5   0   0   0
    0   0]
 [  0   1   1   4   0   0   9 743   4   0   0   1  27   0   3   0   1   0
    0   0]
 [  0   1   0   1   1   0   1   4 798   0   0   0  14   3   4   0   0   1
    0   0]
 [  1   2   1   1   1   2   2   1   1 740  10   1  15   3   2   0   0   0
    0   0]
 [  0   0   0   0   1   0   1   1   0   0 790   0   6   1   1   0   0   0
    0   0]
 [  0   3   3   2   0   1   1   0   0   0   0 768  10   2   0   0

In [None]:
test_predicted_rf = random_forest.predict(X_test_vetor)

accuracy_rf = accuracy_score(y_test, test_predicted_rf)
report_rf = classification_report(y_test, test_predicted_rf)

print(confusion_matrix(y_test, test_predicted_rf))
print("Accuracy:", accuracy_rf)
print("Classification Report:\n", report_rf)

[[115   0   0   0   0   0   0   1   0   0   2   0   1   1   3  18   2   3
    2   3]
 [  0 142  14   9   3  11   1   0   0   2   0   1  10   2   5   1   1   0
    0   0]
 [  0   7 164  10   2   4   2   1   0   0   0   1   2   1   1   0   0   0
    0   0]
 [  0   5  28 116   4   4   7   1   0   0   0   3  12   1   2   0   0   0
    0   0]
 [  0   4   6   8 168   1   5   1   0   0   2   0   7   1   2   0   0   0
    0   0]
 [  0  17  11   1   1 171   0   2   1   0   0   2   4   1   4   0   0   0
    0   0]
 [  0   1   2  13   4   0 154   4   1   2   1   2   5   0   2   1   1   0
    0   0]
 [  0   0   2   2   0   0   5 165   7   2   0   0   6   3   1   1   2   0
    0   0]
 [  1   0   0   0   0   1   4   5 152   1   1   0   0   0   1   0   2   0
    0   0]
 [  0   0   1   0   0   0   0   0   2 185  18   0   2   1   1   1   0   0
    0   0]
 [  0   0   0   0   2   0   0   0   0   1 189   0   2   1   1   2   0   0
    0   0]
 [  0   1   1   0   0   1   1   0   0   0   0 191   3   0   0   0

##Neural Network

In [None]:
from sklearn.neural_network import MLPClassifier

neural_network = MLPClassifier(random_state=42, max_iter=400, n_iter_no_change=20, hidden_layer_sizes=(100, 50, 25))

In [None]:
resultado = neural_network.fit(X_train_vetor, y_train)

train_predict_nn = neural_network.predict(X_train_vetor)

accuracy_nn = accuracy_score(y_train, train_predict_nn)
report_nn = classification_report(y_train, train_predict_nn)

print(confusion_matrix(y_train, train_predict_nn))
print("Accuracy:", accuracy_nn)
print(classification_report(y_train, train_predict_nn))



[[648   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  0 770   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  0   0 790   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  0   0   0 799   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  0   0   0   0 758   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  0   0   1   0   0 772   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  0   0   0   0   0   0 779   1   0   0   0   0   2   0   0   0   0   0
    0   0]
 [  0   0   0   0   0   0   0 793   1   0   0   0   0   0   0   0   0   0
    0   0]
 [  0   0   0   0   0   0   0   0 828   0   0   0   0   0   0   0   0   0
    0   0]
 [  0   0   0   0   0   0   0   0   0 783   0   0   0   0   0   0   0   0
    0   0]
 [  0   0   0   0   0   0   0   0   0   0 801   0   0   0   0   0   0   0
    0   0]
 [  0   0   0   0   0   0   0   0   0   0   0 789   0   0   0   0

In [None]:
test_predict_nn = neural_network.predict(X_test_vetor)

accuracy_nn = accuracy_score(y_test, test_predict_nn)
report_nn = classification_report(y_test, test_predict_nn)

print(confusion_matrix(y_test, test_predict_nn))
print("Accuracy:", accuracy_nn)
print("Classification Report:\n", report_nn)

[[137   0   0   0   0   0   0   0   0   0   0   0   0   0   1   2   0   1
    1   9]
 [  0 184   4   6   0   6   1   0   0   1   0   0   0   0   0   0   0   0
    0   0]
 [  1  11 162  12   1   3   1   1   0   0   0   1   0   2   0   0   0   0
    0   0]
 [  0   8   7 146  11   0   5   0   0   0   0   1   5   0   0   0   0   0
    0   0]
 [  0   5   3   9 178   1   4   0   0   0   0   0   5   0   0   0   0   0
    0   0]
 [  0  29   0   0   2 177   2   0   0   2   0   0   1   0   1   0   0   0
    0   1]
 [  0   3   5  20   2   1 149   9   0   1   1   0   2   0   0   0   0   0
    0   0]
 [  0   4   3   0   0   0   1 183   0   0   0   3   0   0   0   0   0   0
    2   0]
 [  0   7   1   0   0   0   1   3 154   0   0   0   0   0   0   0   0   0
    1   1]
 [  0   1   0   0   0   0   0   0   0 205   2   0   0   0   0   1   0   0
    2   0]
 [  0   2   0   0   0   0   0   0   0   1 191   0   1   0   0   0   0   0
    3   0]
 [  0   7   1   1   0   1   0   0   0   0   0 190   0   0   0   0