<a href="https://colab.research.google.com/github/Prachi194agrawal/AI-/blob/main/sentiment_analysis_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Run these commands to install necessary libraries
!pip install tensorflow keras torch transformers




In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# For TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

# For PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [None]:
# Load data (IMDB dataset in this example)
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

# Data preprocessing for Keras (pad sequences)
max_len = 200
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# TensorFlow/Keras model
model_tf = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=max_len),
    Bidirectional(LSTM(64)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_tf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history_tf = model_tf.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)


Epoch 1/5




[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 301ms/step - accuracy: 0.7200 - loss: 0.5182 - val_accuracy: 0.8570 - val_loss: 0.3497
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 285ms/step - accuracy: 0.8979 - loss: 0.2564 - val_accuracy: 0.8648 - val_loss: 0.3252
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 264ms/step - accuracy: 0.9369 - loss: 0.1639 - val_accuracy: 0.8782 - val_loss: 0.3574
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 259ms/step - accuracy: 0.9611 - loss: 0.1063 - val_accuracy: 0.8562 - val_loss: 0.3779
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 260ms/step - accuracy: 0.9748 - loss: 0.0759 - val_accuracy: 0.8608 - val_loss: 0.4827


In [None]:
# Evaluate on test data
loss, accuracy_tf = model_tf.evaluate(X_test, y_test)
print(f"TensorFlow Model Accuracy: {accuracy_tf:.2f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 68ms/step - accuracy: 0.8531 - loss: 0.5023
TensorFlow Model Accuracy: 0.85


In [None]:
class IMDbDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create data loaders
train_data = IMDbDataset(X_train, y_train)
test_data = IMDbDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)


In [None]:
class SentimentRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, lstm_size, output_size, lstm_layers=1):
        super(SentimentRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, lstm_size, num_layers=lstm_layers, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(lstm_size * 2, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])
        return self.sigmoid(x)

# Instantiate the model, define loss and optimizer
vocab_size = 10000
embed_size = 128
lstm_size = 64
output_size = 1

model_pt = SentimentRNN(vocab_size, embed_size, lstm_size, output_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model_pt.parameters(), lr=0.001)


In [None]:
# Training loop for PyTorch model
for epoch in range(5):
    model_pt.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        predictions = model_pt(X_batch).squeeze()
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Loss: 0.5773
Epoch 2, Loss: 0.3969
Epoch 3, Loss: 0.3534
Epoch 4, Loss: 0.2641
Epoch 5, Loss: 0.2187


In [None]:
# Evaluation loop
model_pt.eval()
y_pred = []
y_true = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        predictions = model_pt(X_batch).squeeze()
        y_pred.extend(predictions.round().cpu().numpy())
        y_true.extend(y_batch.cpu().numpy())

accuracy = accuracy_score(y_true, y_pred)
print(f"PyTorch Model Accuracy: {accuracy:.2f}")


PyTorch Model Accuracy: 0.86


In [None]:
print("TensorFlow Model Performance")
print(f"Accuracy: {accuracy_tf:.2f}")

print("\nPyTorch Model Performance")
print(f"Accuracy: {accuracy:.2f}")


TensorFlow Model Performance
Accuracy: 0.85

PyTorch Model Performance
Accuracy: 0.86


In [None]:
# Install necessary libraries if they are not installed
!pip install torch transformers

# Import libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score

# Step 1: Load and Preprocess the Data
# Load the IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

# Pad sequences to ensure uniform input length
max_len = 200
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# Step 2: Define a Custom Dataset Class
class IMDbDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create DataLoaders
train_data = IMDbDataset(X_train, y_train)
test_data = IMDbDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Step 3: Define the PyTorch Model
class SentimentRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, lstm_size, output_size, lstm_layers=1):
        super(SentimentRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, lstm_size, num_layers=lstm_layers, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(lstm_size * 2, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])
        return self.sigmoid(x)

# Instantiate the model, define loss and optimizer
vocab_size = 10000
embed_size = 128
lstm_size = 64
output_size = 1

model = SentimentRNN(vocab_size, embed_size, lstm_size, output_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 4: Train the Model
epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        predictions = model(X_batch).squeeze()
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}")

# Step 5: Evaluate the Model
model.eval()
y_pred = []
y_true = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        predictions = model(X_batch).squeeze()
        y_pred.extend(predictions.round().cpu().numpy())
        y_true.extend(y_batch.cpu().numpy())

accuracy = accuracy_score(y_true, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Epoch 1, Loss: 0.5680
Epoch 2, Loss: 0.4634
Epoch 3, Loss: 0.5063
Epoch 4, Loss: 0.3549
Epoch 5, Loss: 0.2828
Model Accuracy: 0.84


In [None]:
!pip install -q streamlit

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m47.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m53.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
%%writefile app.py
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# For TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
import streamlit as st

# Load data (IMDB dataset in this example)
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

# Data preprocessing for Keras (pad sequences)
max_len = 200
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# TensorFlow/Keras model
model_tf = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=max_len),
    Bidirectional(LSTM(64)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_tf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history_tf = model_tf.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate on test data
loss, accuracy_tf = model_tf.evaluate(X_test, y_test)
print(f"TensorFlow Model Accuracy: {accuracy_tf:.2f}")

# Streamlit App for Sentence Segmentation
st.title("Sentence Segment Analysis App")
st.write("Analyze sentiment and highlight segments of sentences.")

def predict_segments(text):
    # Tokenize and pad text
    tokenizer = Tokenizer(num_words=10000)
    tokenizer.fit_on_texts([text])
    sequences = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequences, maxlen=max_len)

    # Predict sentiment
    prediction = model_tf.predict(padded)
    sentiment = "Positive" if prediction[0] > 0.5 else "Negative"

    # Highlight segments based on sentiment
    words = text.split()
    highlighted = [(word, "Positive" if prediction[0] > 0.5 else "Negative") for word in words]
    return sentiment, highlighted

user_input = st.text_area("Enter a sentence to analyze:", "")
if st.button("Analyze"):
    if user_input:
        sentiment, highlighted_segments = predict_segments(user_input)
        st.write(f"Overall Sentiment: {sentiment}")
        st.write("Segments:")
        for word, segment_sentiment in highlighted_segments:
            st.write(f"{word}: {segment_sentiment}")
    else:
        st.write("Please enter a sentence.")


Writing app.py


In [None]:
%%writefile app.py
# app.py
import streamlit as st
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import BertTokenizer, BertForSequenceClassification

# Install necessary libraries if running for the first time
# !pip install torch transformers tensorflow

# Load pre-trained BERT tokenizer and model for simplicity
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Set up Streamlit app title and description
st.title("Sentiment Analysis App")
st.write("Enter text, and this model will analyze its sentiment.")

# Define the function to process input text and get the model output
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=200, truncation=True, padding="max_length")
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    prediction = torch.sigmoid(logits).argmax(dim=1).item()
    sentiment = "Positive" if prediction == 1 else "Negative"
    return sentiment

# Get user input and run the sentiment prediction
user_input = st.text_area("Enter text here:", "")
if st.button("Analyze Sentiment"):
    if user_input:
        sentiment = predict_sentiment(user_input)
        st.write(f"Sentiment: {sentiment}")
    else:
        st.write("Please enter some text to analyze.")

# To run, use: `streamlit run app.py`


Writing app.py


In [None]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K
added 22 packages in 3s
[1G[0K⠦[1G[0K
[1G[0K⠦[1G[0K3 packages are looking for funding
[1G[0K⠦[1G[0K  run `npm fund` for details
[1G[0K⠦[1G[0K

In [None]:
!streamlit run /content/app.py &>/content/logs.txt &

In [None]:
! wget -q -O - ipv4.icanhazip.com

34.16.215.18


In [None]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0Kyour url is: https://free-buckets-sip.loca.lt
