In [1]:
!pip install tensorflow==1.15

Collecting tensorflow==1.15
[?25l  Downloading https://files.pythonhosted.org/packages/3f/98/5a99af92fb911d7a88a0005ad55005f35b4c1ba8d75fba02df726cd936e6/tensorflow-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl (412.3MB)
[K     |████████████████████████████████| 412.3MB 28kB/s 
Collecting tensorboard<1.16.0,>=1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 35.1MB/s 
Collecting tensorflow-estimator==1.15.1
[?25l  Downloading https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503kB)
[K     |████████████████████████████████| 512kB 38.8MB/s 
[?25hCollecting gast==0.2.2
  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Bu

In [4]:
from collections import Counter

import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_20newsgroups

categories = ["comp.windows.x", "rec.motorcycles", "sci.crypt", "sci.space"]
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
vocab = Counter()
for text in newsgroups_train.data:
    for word in text.split(' '):
        vocab[word.lower()] += 1
for text in newsgroups_test.data:
    for word in text.split(' '):
        vocab[word.lower()] += 1
total_words = len(vocab)


def get_word_2_index(vocab):
    word2index = {}
    for i, word in enumerate(vocab):
        word2index[word.lower()] = i
    return word2index


word2index = get_word_2_index(vocab)


def get_batch(df, i, batch_size):
  batches = []
  results = []
  texts = df.data[i * batch_size:i * batch_size + batch_size]
  categories = df.target[i * batch_size:i * batch_size + batch_size]

  for text in texts:
    layer = np.zeros(total_words, dtype=float)
    for word in text.split(' '):
      layer[word2index[word.lower()]] += 1
    batches.append(layer)

  for category in categories:
    y = np.zeros((4), dtype=float)
    if category == 0:
      y[0] = 1
    elif category == 1:
      y[1] = 1
    elif category == 2:
      y[2] = 1
    else:
      y[3] = 1
  results.append(y)

  return np.array(batches), np.array(results)


# Параметры обучения
learning_rate = 0.01
training_epochs = 10
batch_size = 150
display_step = 1

# Network Parameters
n_hidden_1 = 10 # скрытый слой
n_hidden_2 = 10 # скрытый слой
n_hidden_3 = 5 # скрытый слой
n_input = total_words # количество уникальных слов в наших текстах
n_classes = 4 # 4 класса
input_tensor = tf.placeholder(tf.float32,[None, n_input],name="input")
output_tensor = tf.placeholder(tf.float32,[None, n_classes],name="output")


def multilayer_perceptron(input_tensor, weights, biases):
  # скрытый слой
  layer_1_multiplication = tf.matmul(input_tensor, weights['h1'])
  layer_1_addition = tf.add(layer_1_multiplication, biases['b1'])
  layer_1 = tf.nn.relu(layer_1_addition)
  
  # скрытый слой
  layer_2_multiplication = tf.matmul(layer_1, weights['h2'])
  layer_2_addition = tf.add(layer_2_multiplication, biases['b2'])
  layer_2 = tf.nn.relu(layer_2_addition)

  layer_3_multiplication = tf.matmul(layer_2, weights['h3'])
  layer_3_addition = tf.add(layer_3_multiplication, biases['b3'])
  layer_3 = tf.nn.relu(layer_3_addition)

  # выходной слой
  out_layer_multiplication = tf.matmul(layer_3, weights['out'])
  out_layer_addition = out_layer_multiplication + biases['out']
  return out_layer_addition


# инициализация параметров сети
weights = {
 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
 'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
 'out': tf.Variable(tf.random_normal([n_hidden_3, n_classes]))
}
biases = {
 'b1': tf.Variable(tf.random_normal([n_hidden_1])),
 'b2': tf.Variable(tf.random_normal([n_hidden_2])),
 'b3': tf.Variable(tf.random_normal([n_hidden_3])),
 'out': tf.Variable(tf.random_normal([n_classes]))
}

# создание модели
prediction = multilayer_perceptron(input_tensor, weights, biases)
# Фукнция потерь
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=output_tensor))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
init = tf.global_variables_initializer()

# Запуск
with tf.Session() as sess:
  sess.run(init)
  # Цикл обучения
  for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(len(newsgroups_train.data)/batch_size)
    # Проход по всем батчам
    for i in range(total_batch):
      batch_x,batch_y = get_batch(newsgroups_train,i,batch_size)
      c,_ = sess.run([loss,optimizer], feed_dict={input_tensor: batch_x,output_tensor:batch_y})
    # Вычисляем среднее фукнции потерь
      avg_cost += c / total_batch
    print("Эпоха:", '%04d' % (epoch+1), "loss=", "{:.16f}".format(avg_cost))
  print("Обучение завершено!")
  # Тестирование
  correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(output_tensor, 1))
  # Расчет точности
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
  total_test_data = len(newsgroups_test.target)
  batch_x_test,batch_y_test = get_batch(newsgroups_test,0,total_test_data)
  print("Точность:", accuracy.eval({input_tensor: batch_x_test, output_tensor: batch_y_test}))

Эпоха: 0001 loss= 188.6810341278712144
Эпоха: 0002 loss= 24.6067061821619646
Эпоха: 0003 loss= 1.7154735962549845
Эпоха: 0004 loss= 1.5524506131807962
Эпоха: 0005 loss= 1.5262028694152832
Эпоха: 0006 loss= 1.4997873028119408
Эпоха: 0007 loss= 1.4774873534838362
Эпоха: 0008 loss= 1.4622506340344745
Эпоха: 0009 loss= 1.4442233363787336
Эпоха: 0010 loss= 1.4331837773323057
Обучение завершено!
Точность: 0.9974732
