In [None]:
!pip install tensorflow

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Attention

In [None]:
data = pd.read_csv('train.csv')

In [None]:
metaphor_words = {0:'road', 1:'candle', 2: 'light', 3: 'spice', 4: 'ride', 5: 'train', 6: 'boat'}

In [None]:
data.shape

(1870, 3)

In [None]:
data.describe

<bound method NDFrame.describe of       metaphorID  label_boolean  \
0              0           True   
1              2          False   
2              2          False   
3              4          False   
4              2          False   
...          ...            ...   
1865           4           True   
1866           4           True   
1867           0           True   
1868           5           True   
1869           2           True   

                                                   text  
0     Hey , Karen !!!! I was told that on the day of...  
1     Hi Ladies ... my last chemo was Feb 17/09 , ra...  
2     I have just come form my consult with a lovely...  
3     I also still question taking Tamox for stage 1...  
4     Just checking in to say hello ladies . I had a...  
...                                                 ...  
1865  Hi there . I found my lump 3 weeks ago and it ...  
1866  Robyn-Sorry you find yourself on this web site...  
1867  I 'm happy Jule t

In [None]:
data.head()

Unnamed: 0,metaphorID,label_boolean,text
0,0,True,"Hey , Karen !!!! I was told that on the day of..."
1,2,False,"Hi Ladies ... my last chemo was Feb 17/09 , ra..."
2,2,False,I have just come form my consult with a lovely...
3,4,False,I also still question taking Tamox for stage 1...
4,2,False,Just checking in to say hello ladies . I had a...


In [None]:
vectorizer = CountVectorizer(stop_words='english', max_features=1000)

In [None]:
X = vectorizer.fit_transform(data['text'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, data['label_boolean'], test_size=0.2, random_state=42)

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Accuracy: 0.7994652406417112
Precision: 0.8311
Recall: 0.9127
F1 Score: 0.8700


In [None]:
model = MultinomialNB()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Accuracy: 0.8181818181818182
Precision: 0.8819
Recall: 0.8691
F1 Score: 0.8755


In [None]:
data['word'] = data['metaphorID'].map(metaphor_words)

In [None]:
data

Unnamed: 0,metaphorID,label_boolean,text,word
0,0,True,"Hey , Karen !!!! I was told that on the day of...",road
1,2,False,"Hi Ladies ... my last chemo was Feb 17/09 , ra...",light
2,2,False,I have just come form my consult with a lovely...,light
3,4,False,I also still question taking Tamox for stage 1...,ride
4,2,False,Just checking in to say hello ladies . I had a...,light
...,...,...,...,...
1865,4,True,Hi there . I found my lump 3 weeks ago and it ...,ride
1866,4,True,Robyn-Sorry you find yourself on this web site...,ride
1867,0,True,I 'm happy Jule that you posted this question ...,road
1868,5,True,Hiya April RADs-I should probably have been he...,train


In [None]:
data['paragraph'] = data['word'] + ' ' + data['text']

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['paragraph'].tolist())
total_words = len(tokenizer.word_index) + 1

In [None]:
X = tokenizer.texts_to_sequences(data['paragraph'].tolist())
X = pad_sequences(X)
y = data['label_boolean']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = Sequential()
model.add(Embedding(total_words, 50, input_length=X.shape[1]))
model.add(LSTM(100, return_sequences=True))
model.add(Attention())  # Attention layer expects [query, value]
model.add(Dense(1, activation='sigmoid'))

ValueError: ignored

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Attention, Concatenate

def build_lstm_attention_model(input_length):
    # Define the input layer
    input_text = tf.keras.layers.Input(shape=(input_length,), dtype=tf.string)

    # Tokenize the input text
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts([input_text])
    total_words = len(tokenizer.word_index) + 1

    # Convert text to sequences
    sequences = tokenizer.texts_to_sequences([input_text])
    padded_sequences = pad_sequences(sequences, maxlen=input_length, padding='post')

    # Define the embedding layer
    embedding_dim = 50  # Adjust based on your requirements
    embedding_layer = Embedding(input_dim=total_words, output_dim=embedding_dim, input_length=input_length)(padded_sequences)

    # Define the LSTM layer
    lstm_units = 100  # Adjust based on your requirements
    lstm_layer = LSTM(units=lstm_units, return_sequences=True)(embedding_layer)

    # Apply attention mechanism
    attention = Attention()([lstm_layer, lstm_layer])

    # Concatenate LSTM output and attention output
    lstm_attention_concat = Concatenate(axis=-1)([lstm_layer, attention])

    # Global average pooling layer
    avg_pooling = tf.keras.layers.GlobalAveragePooling1D()(lstm_attention_concat)

    # Fully connected layer for classification
    output_layer = Dense(units=1, activation='sigmoid')(avg_pooling)

    # Create the model
    model = Model(inputs=input_text, outputs=output_layer)

    return model

# Example usage
label_boolean_text = "word True Hey , Karen !!!! I was told that on the day of..."
word_to_check = "Karen"

# Assuming a maximum sequence length of 20 words
max_sequence_length = 20

# Build the model
model = build_lstm_attention_model(input_length=max_sequence_length)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Example training data (dummy data)
X_train = [label_boolean_text]
y_train = [1]  # 1 indicates metaphor, 0 indicates non-metaphor

# Convert labels to NumPy array
y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)

# Train the model
model.fit(x=X_train, y=y_train, epochs=10, batch_size=1, verbose=1)

# Evaluate the model
result = model.predict([word_to_check])
if result > 0.5:
    print(f"The word '{word_to_check}' is used as a metaphor.")
else:
    print(f"The word '{word_to_check}' is not used as a metaphor.")
