In [1]:
import numpy as np
from keras.utils import to_categorical
from keras import models
from keras import layers

In [2]:
import pandas as pd
from janome.tokenizer import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Positive Sentences
positive_sentences = [
    '私は幸せです。',
    '彼女はとても親切です。',
    'この本はとても面白いです。',
    '日本の食べ物は美味しいです。',
    '彼は優秀な学生です。',
    'この町は美しい景色があります。',
    '彼女の笑顔は素晴らしいです。',
    '私の家族はとても支えてくれます。',
    'あなたの努力は素晴らしいです。',
    'この映画は感動的でした。'
]

# Negative Sentences
negative_sentences = [
    '私は悲しいです。',
    '彼は無礼な人です。',
    'このテストは難しすぎます。',
    'その店のサービスは最悪です。',
    '私の仕事はつまらないです。',
    '彼の態度は腹立たしいです。',
    'このプロジェクトは失敗しました。',
    'あのレストランの料理はまずいです。',
    '彼女の態度は不快です。',
    '私の財布を失くしました。'
]

# Combine sentences
sentences = positive_sentences + negative_sentences

# Labels (1 for positive, 0 for negative)
labels = [1] * len(positive_sentences) + [0] * len(negative_sentences)

# Tokenization using Janome
tokenizer = Tokenizer()
tokenized_sentences = [tokenizer.tokenize(sentence, wakati=True) for sentence in sentences]

# Create a DataFrame
df = pd.DataFrame({'Sentences': tokenized_sentences, 'Label': labels})

# Vectorization
word_index = {}
sequences = []
index = 1
for sentence_tokens in tokenized_sentences:
    sequence = []
    for token in sentence_tokens:
        if token not in word_index:
            word_index[token] = index
            index += 1
        sequence.append(word_index[token])
    sequences.append(sequence)

# Padding sequences
max_length = max(len(sequence) for sequence in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# Print the vectorized data
print(padded_sequences)
print(labels)

# Reflect here


[[ 0  0  0  0  0  1  2  3  4  5]
 [ 0  0  0  0  6  2  7  8  4  5]
 [ 0  0  0  9 10  2  7 11  4  5]
 [ 0  0  0 12 13 14  2 15  4  5]
 [ 0  0  0 16  2 17 18 19  4  5]
 [ 0  9 20  2 21 22 23 24 25  5]
 [ 0  0  0  6 13 26  2 27  4  5]
 [ 1 13 28  2  7 29 30 31 25  5]
 [ 0  0  0 32 13 33  2 27  4  5]
 [ 0  0  9 34  2 35 36 37 38  5]
 [ 0  0  0  0  0  1  2 39  4  5]
 [ 0  0  0 16  2 40 18 41  4  5]
 [ 0  0  0  9 42  2 43 44 25  5]
 [ 0  0 45 46 13 47  2 48  4  5]
 [ 0  0  0  1 13 49  2 50  4  5]
 [ 0  0  0 16 13 51  2 52  4  5]
 [ 0  0  9 53  2 54 55 56 38  5]
 [ 0  0 57 58 13 59  2 60  4  5]
 [ 0  0  0  6 13 51  2 61  4  5]
 [ 0  1 13 62 63 64 65 56 38  5]]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [3]:
# Padding sequences with max length of 100
max_length = 100
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# Print the vectorized data
print(padded_sequences)
print(labels)

[[ 0  0  0 ...  3  4  5]
 [ 0  0  0 ...  8  4  5]
 [ 0  0  0 ... 11  4  5]
 ...
 [ 0  0  0 ... 60  4  5]
 [ 0  0  0 ... 61  4  5]
 [ 0  0  0 ... 56 38  5]]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [21]:
input_data = np.array(padded_sequences)
labels = np.array(labels)

train_x = input_data[5:]
train_y = labels[5:]
test_x = input_data[:5]
test_y = labels[:5]

In [23]:
test_y

array([1, 1, 1, 1, 1])

In [24]:
from keras import layers, models, callbacks

model = models.Sequential()
model.add(layers.Dense(50, activation="relu", input_shape=(100,)))
model.add(layers.Dropout(0.3, noise_shape=None, seed=None))
model.add(layers.Dense(50, activation="relu"))
model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
model.add(layers.Dense(50, activation="relu"))
model.add(layers.Dense(1, activation="sigmoid"))

model.summary()

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Define the EarlyStopping callback
early_stopping = callbacks.EarlyStopping(monitor='accuracy', patience=5, mode='max', verbose=1)

results = model.fit(train_x, train_y, epochs=100, batch_size=32, validation_data=(test_x, test_y), callbacks=[early_stopping])



Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 50)                5050      
                                                                 
 dropout_6 (Dropout)         (None, 50)                0         
                                                                 
 dense_13 (Dense)            (None, 50)                2550      
                                                                 
 dropout_7 (Dropout)         (None, 50)                0         
                                                                 
 dense_14 (Dense)            (None, 50)                2550      
                                                                 
 dense_15 (Dense)            (None, 1)                 51        
                                                                 
Total params: 10,201
Trainable params: 10,201
Non-trai

In [25]:
# Evaluate the model
loss, accuracy = model.evaluate(test_x, test_y)

# Print the evaluation results
print("Loss:", loss)
print("Accuracy:", accuracy)



Loss: 0.9230367541313171
Accuracy: 0.0


In [26]:
model.save('model.hdf5')


In [27]:
from keras.models import load_model

# Load the saved model
model = load_model('model.hdf5')

# Print the summary of the model
model.summary()

# Access the layers and their weights
for layer in model.layers:
    print(layer.name)
    print(layer.get_weights())


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 50)                5050      
                                                                 
 dropout_6 (Dropout)         (None, 50)                0         
                                                                 
 dense_13 (Dense)            (None, 50)                2550      
                                                                 
 dropout_7 (Dropout)         (None, 50)                0         
                                                                 
 dense_14 (Dense)            (None, 50)                2550      
                                                                 
 dense_15 (Dense)            (None, 1)                 51        
                                                                 
Total params: 10,201
Trainable params: 10,201
Non-trai

In [28]:
from janome.tokenizer import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import load_model

# Random Japanese sentence
sentence = "あなたの日本語の文章をここに入力してください。"

# Tokenization using Janome
tokenizer = Tokenizer()
tokens = tokenizer.tokenize(sentence, wakati=True)

# Convert tokens to indices based on the word_index
input_sequence = [word_index.get(token, 0) for token in tokens]

# Pad the sequence
max_length = 100  # Set the maximum sequence length used during training
padded_sequence = pad_sequences([input_sequence], maxlen=max_length)

# Load the saved model
model = load_model('model.hdf5')

# Make predictions
predictions = model.predict(padded_sequence)

# Interpret the predictions
if predictions[0] > 0.5:
    sentiment = "Positive"
else:
    sentiment = "Negative"

print("Sentence: ", sentence)
print("Sentiment: ", sentiment)


Sentence:  あなたの日本語の文章をここに入力してください。
Sentiment:  Negative


In [None]:
# Convert sentences to tokenized sequences
tokenized_sequences = []
for sentence in df['Sentence']:
    sentence_tokens = tokenizer.tokenize(sentence, wakati=True)
    sequence = [word_index.get(token, 0) for token in sentence_tokens]
    tokenized_sequences.append(sequence)

# Pad sequences
padded_sequences = pad_sequences(tokenized_sequences, maxlen=max_length)

# Make predictions
predictions = model.predict(padded_sequences)

# Interpret the predictions
sentiments = []
for prediction in predictions:
    if prediction > 0.5:
        sentiment = "Positive"
    else:
        sentiment = "Negative"
    sentiments.append(sentiment)

# Add predicted sentiments to the DataFrame
df['Predicted Sentiment'] = sentiments

# Print the DataFrame with predicted sentiments
print(df)