In [1]:
! pip install streamlit -q


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.4/23.4 MB[0m [31m60.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m77.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
%%writefile app.py


Overwriting app.py


In [3]:
#pip install streamlit
import streamlit as st
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Activation, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split

# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('wordnet')

# Preprocessing functions
STOPWORDS = set(stopwords.words('english'))
lemmatizer = nltk.WordNetLemmatizer()

def preprocess_text(text):
    # Remove URLs, mentions, and numbers
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'\d+', '', text)

    # Remove punctuations
    text = re.sub(r'[^\w\s]', '', text)

    # Remove stopwords
    text = " ".join([word for word in text.split() if word not in STOPWORDS])

    # Lemmatize words
    text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])

    return text

# Load dataset with a limit of 20,000 samples
@st.cache
def load_data():
    data = pd.read_csv("/content/training.1600000.processed.noemoticon.csv", encoding="ISO-8859-1", engine="python")
    data.columns = ["label", "time", "date", "query", "username", "text"]
    data = data[["label", "text"]]
    data['label'] = data['label'].apply(lambda x: 1 if x == 4 else 0)  # Convert labels: 4 -> Positive, 0 -> Negative
    return data.sample(n=20000, random_state=42)  # Select 20,000 random samples

# Build TensorFlow model
def tensorflow_based_model(max_len):
    inputs = Input(name='inputs', shape=[max_len])
    layer = Embedding(2000, 50, input_length=max_len)(inputs)
    layer = LSTM(64)(layer)
    layer = Dense(256, name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.5)(layer)
    layer = Dense(1, name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs, outputs=layer)
    return model

# Load and preprocess data
data = load_data()
data['text'] = data['text'].apply(preprocess_text)

# Tokenization
tokenizer = Tokenizer(num_words=2000)
tokenizer.fit_on_texts(data['text'])
max_len = 50
X = tokenizer.texts_to_sequences(data['text'])
X = pad_sequences(X, maxlen=max_len)
Y = data['label']

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Train model
model = tensorflow_based_model(max_len)
model.compile(loss='binary_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=80, epochs=1, validation_split=0.1)

# Streamlit Application
st.title("Sentiment Analysis")

st.write("Enter a tweet or text to predict the sentiment.")

# Input text box
input_text = st.text_area("Input Text", height=150)

if st.button("Predict Sentiment"):
    if input_text.strip():
        preprocessed_text = preprocess_text(input_text)
        input_sequence = tokenizer.texts_to_sequences([preprocessed_text])
        input_padded = pad_sequences(input_sequence, maxlen=max_len)
        prediction = model.predict(input_padded)
        sentiment = "Positive" if prediction[0][0] > 0.5 else "Negative"
        st.success(f"Sentiment: {sentiment}")
    else:
        st.error("Please enter some text!")


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
2024-12-12 15:26:04.092 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-12-12 15:26:04.100 
`st.cache` is deprecated and will be removed soon. Please use one of Streamlit's new
caching commands, `st.cache_data` or `st.cache_resource`. More information
[in our docs](https://docs.streamlit.io/develop/concepts/architecture/caching).

**Note**: The behavior of `st.cache` was updated in Streamlit 1.36 to the new caching
logic used by `st.cache_data` and `st.cache_resource`. This might lead to some problems
or unexpected behavior in certain edge cases.

2024-12-12 15:26:04.104 No runtime found, using MemoryCacheStorageManager
2024-12-12 15:26:04.108 No runtime found, using MemoryCacheStorageManager


[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 30ms/step - accuracy: 0.5452 - loss: 0.6844 - val_accuracy: 0.6794 - val_loss: 0.5874


2024-12-12 15:26:33.725 Session state does not function when running a script without `streamlit run`


In [4]:
!wget -q -O - ipv4.icanhazip.com


35.234.59.34


In [None]:
! streamlit run app.py & npx localtunnel --port 8501


[1G[0K⠙[1G[0K⠹
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.234.59.34:8501[0m
[0m
your url is: https://bumpy-snails-stick.loca.lt
2024-12-12 15:31:29.876443: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-12 15:31:29.894109: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-12 15:31:29.899578: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register c