In [5]:
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout, Input, Attention, Concatenate, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import ReduceLROnPlateau
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import nltk

TypeError: Unable to convert function return value to a Python type! The signature was
	() -> handle

In [8]:
# Загрузка NLTK и установка стеммера
nltk.download('stopwords')
stemmer = PorterStemmer()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\NetDriver\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [11]:
def preprocess_text(texts):
    stop_words = set(stopwords.words('english'))
    processed_texts = []
    for text in texts:
        words = [stemmer.stem(word) for word in text.split() if word not in stop_words]
        processed_texts.append(' '.join(words))
    return processed_texts

In [None]:
def load_glove_embeddings(file_path, embedding_dim):
    embeddings_index = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    return embeddings_index

In [None]:
def create_embedding_matrix(word_index, embeddings_index, embedding_dim):
    num_words = min(max_words, len(word_index) + 1)
    embedding_matrix = np.zeros((num_words, embedding_dim))
    for word, i in word_index.items():
        if i >= max_words:
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    return embedding_matrix

In [15]:
# Путь к файлу GloVe
file_path = 'glove.6B.300d.txt'
embedding_dim = 300
embeddings_index = load_glove_embeddings(file_path, embedding_dim)

max_words = 10000
maxlen = 200

In [16]:
from tensorflow.keras.datasets import imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words)
word_index = imdb.get_word_index()
word_index = {k: (v + 3) for k, v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3

embedding_matrix = create_embedding_matrix(word_index, embeddings_index, embedding_dim)

In [17]:
# Дополнение или обрезка отзывов до maxlen слов
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [37]:
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.regularizers import l2

def create_model_with_regularization(embedding_matrix):
    input_layer = Input(shape=(maxlen,))
    embedding_layer = Embedding(input_dim=max_words, output_dim=embedding_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(input_layer)
    
    gru_out = Bidirectional(GRU(256, return_sequences=True, kernel_regularizer=l2(0.01)))(embedding_layer)
    attention_out = Attention()([gru_out, gru_out])
    context_vector = Concatenate(axis=-1)([gru_out, attention_out])
    context_vector = Flatten()(context_vector)

    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(context_vector)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = Dropout(0.5)(x)
    output_layer = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model


In [None]:

model = create_model_with_improvements(embedding_matrix)

# Использование ReduceLROnPlateau для изменения скорости обучения
lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.5, min_lr=1e-6)

# Обучение модели
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test), callbacks=[lr_reduction])

# Оценка модели
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')

In [7]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

TypeError: Unable to convert function return value to a Python type! The signature was
	() -> handle

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.
