<a href="https://colab.research.google.com/github/Ibrahim-Maiga/Datasets/blob/main/Trained_model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import pandas as pd

url = 'https://raw.githubusercontent.com/Ibrahim-Maiga/Datasets/main/stock_data.csv'
data = pd.read_csv(url)


In [4]:
import re
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Clean the text data
def clean_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = text.lower()
    return text

data['cleaned_text'] = data['Text'].apply(clean_text)
data['Sentiment'] = data['Sentiment'].replace(-1, 0)

# Tokenize the text
tokenizer = Tokenizer(num_words=5000, oov_token='')
tokenizer.fit_on_texts(data['cleaned_text'])
sequences = tokenizer.texts_to_sequences(data['cleaned_text'])
padded_sequences = pad_sequences(sequences, maxlen=50, padding='post')

# Split the data
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, data['Sentiment'], test_size=0.2, random_state=42)


In [5]:
data.head(20)

Unnamed: 0,Text,Sentiment,cleaned_text
0,Kickers on my watchlist XIDE TIT SOQ PNK CPW B...,1,kickers on my watchlist xide tit soq pnk cpw b...
1,user: AAP MOVIE. 55% return for the FEA/GEED i...,1,user aap movie return for the feageed indicat...
2,user I'd be afraid to short AMZN - they are lo...,1,user id be afraid to short amzn they are look...
3,MNTA Over 12.00,1,mnta over
4,OI Over 21.37,1,oi over
5,PGNX Over 3.04,1,pgnx over
6,AAP - user if so then the current downtrend wi...,0,aap user if so then the current downtrend wil...
7,Monday's relative weakness. NYX WIN TIE TAP IC...,0,mondays relative weakness nyx win tie tap ice ...
8,GOOG - ower trend line channel test & volume s...,1,goog ower trend line channel test volume sup...
9,AAP will watch tomorrow for ONG entry.,1,aap will watch tomorrow for ong entry


In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense

model = Sequential([
    Embedding(input_dim=5000, output_dim=64, input_length=50),
    Bidirectional(LSTM(64, dropout=0.2, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 50, 64)            320000    
                                                                 
 bidirectional (Bidirection  (None, 50, 128)           66048     
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 64)                41216     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 427329 (1.63 MB)
Trainable params: 427329 (1.63 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [7]:
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test), batch_size=64)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')


Loss: 1.234459638595581, Accuracy: 0.7584124207496643


In [9]:
import pickle

# Save the model
model.save('semantic_analysis_model.h5')

# Save the Tokenizer
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)


  saving_api.save_model(


In [10]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the trained model
model = load_model('semantic_analysis_model.h5')

# Load the Tokenizer
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

# Function to predict text
def predict_text(text):
    cleaned_text = clean_text(text)
    sequence = tokenizer.texts_to_sequences([cleaned_text])
    padded_sequence = pad_sequences(sequence, maxlen=50, padding='post')
    prediction = model.predict(padded_sequence)
    return 'Positive' if prediction >= 0.5 else 'Negative'

In [11]:
# Example usage
new_text = "Don't buy stock today!!"
prediction = predict_text(new_text)
print(f'The sentiment of the news headline "{new_text}" is {prediction}.')


# Example usage
new_text = "Today is the best day to buy stock!"
prediction = predict_text(new_text)
print(f'The sentiment of the news headline "{new_text}" is {prediction}.')

The sentiment of the news headline "Don't buy stock today!!" is Negative.
The sentiment of the news headline "Today is the best day to buy stock!" is Positive.
