<a href="https://colab.research.google.com/github/RoshaniVijayan/TwitterSentimentAnalysis/blob/main/Run_streamlit_app_Colab_ngrok.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/RoshaniVijayan/TwitterSentimentAnalysis.git

Cloning into 'TwitterSentimentAnalysis'...
remote: Enumerating objects: 45, done.[K
remote: Counting objects: 100% (45/45), done.[K
remote: Compressing objects: 100% (44/44), done.[K
remote: Total 45 (delta 21), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (45/45), 2.00 MiB | 3.51 MiB/s, done.
Resolving deltas: 100% (21/21), done.


In [2]:
# Install required libraries
!pip install numpy
!pip install gdown
!pip install beautifulsoup4
!pip install textblob
!pip install scikit-learn




In [4]:
import re
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
import spacy

In [5]:
# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define stopwords
stopwords = nlp.Defaults.stop_words

# Read the CSV file
df = pd.read_csv('/content/TwitterSentimentAnalysis/twittersentiment.csv', header=None, index_col=[0])
df = df[[2, 3]].reset_index(drop=True)
df.columns = ['sentiment', 'text']
df.head()

Unnamed: 0,sentiment,text
0,Positive,im getting on borderlands and i will murder yo...
1,Positive,I am coming to the borders and I will kill you...
2,Positive,im getting on borderlands and i will kill you ...
3,Positive,im coming on borderlands and i will murder you...
4,Positive,im getting on borderlands 2 and i will murder ...


In [6]:
# Drop NaN values and texts with length less than 1
df.dropna(inplace=True)
df = df[df['text'].apply(len) > 1]
df.head()

Unnamed: 0,sentiment,text
0,Positive,im getting on borderlands and i will murder yo...
1,Positive,I am coming to the borders and I will kill you...
2,Positive,im getting on borderlands and i will kill you ...
3,Positive,im coming on borderlands and i will murder you...
4,Positive,im getting on borderlands 2 and i will murder ...


In [7]:
# Necessary Functions for data cleaning

# Function to get basic text features
def get_basic_features(df, stopwords):
    df['char_counts'] = df['text'].apply(len)
    df['word_counts'] = df['text'].apply(lambda x: len(x.split()))
    df['avg_wordlength'] = df.apply(lambda x: len(x['text'].replace(" ", "")) / len(x['text'].split()) if len(x['text'].split()) != 0 else 0, axis=1)
    df['stopwords_counts'] = df['text'].apply(lambda x: len([t for t in x.split() if t in stopwords]))
    df['hashtag_counts'] = df['text'].apply(lambda x: len([t for t in x.split() if t.startswith('#')]))
    df['mentions_counts'] = df['text'].apply(lambda x: len([t for t in x.split() if t.startswith('@')]))
    df['digits_counts'] = df['text'].apply(lambda x: len(re.findall(r'[0-9,.]+', x)))
    df['uppercase_counts'] = df['text'].apply(lambda x: len([t for t in x.split() if t.isupper()]))
    return df

# Get basic features
df = get_basic_features(df, stopwords)
df.head()

Unnamed: 0,sentiment,text,char_counts,word_counts,avg_wordlength,stopwords_counts,hashtag_counts,mentions_counts,digits_counts,uppercase_counts
0,Positive,im getting on borderlands and i will murder yo...,53,11,3.909091,6,0,0,1,0
1,Positive,I am coming to the borders and I will kill you...,51,12,3.333333,6,0,0,1,2
2,Positive,im getting on borderlands and i will kill you ...,50,10,4.1,5,0,0,1,0
3,Positive,im coming on borderlands and i will murder you...,51,10,4.2,5,0,0,1,0
4,Positive,im getting on borderlands 2 and i will murder ...,57,12,3.833333,6,0,0,2,0


In [8]:
# Data cleaning
df['text'] = df['text'].apply(lambda x: x.lower())
df.head()

Unnamed: 0,sentiment,text,char_counts,word_counts,avg_wordlength,stopwords_counts,hashtag_counts,mentions_counts,digits_counts,uppercase_counts
0,Positive,im getting on borderlands and i will murder yo...,53,11,3.909091,6,0,0,1,0
1,Positive,i am coming to the borders and i will kill you...,51,12,3.333333,6,0,0,1,2
2,Positive,im getting on borderlands and i will kill you ...,50,10,4.1,5,0,0,1,0
3,Positive,im coming on borderlands and i will murder you...,51,10,4.2,5,0,0,1,0
4,Positive,im getting on borderlands 2 and i will murder ...,57,12,3.833333,6,0,0,2,0


In [9]:
# Function to remove emails from text
def remove_emails(x):
    return re.sub(r'([a-z0-9+._-]+@[a-z0-9+._-]+\.[a-z0-9+_-]+)', "", x)

df['text'] = df['text'].apply(remove_emails)

In [10]:
# Function to remove URLs from text
def remove_urls(x):
    return re.sub(r'(http|https|ftp|ssh)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?', '', x)

df['text'] = df['text'].apply(remove_urls)

In [11]:
# Function to remove HTML tags from text
def remove_html_tags(x):
    return BeautifulSoup(x, 'lxml').get_text().strip()

df['text'] = df['text'].apply(remove_html_tags)

  return BeautifulSoup(x, 'lxml').get_text().strip()


In [12]:
# Function to remove special characters from text using spaCy tokenizer
def remove_special_chars(x):
    tokens = [token.text for token in nlp(x)]
    return ' '.join(tokens)

df['text'] = df['text'].apply(remove_special_chars)

In [13]:
# Function to remove 'RT' (retweet) from text
def remove_rt(x):
    return re.sub(r'\brt\b', '', x).strip()

df['text'] = df['text'].apply(remove_rt)

In [14]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2, random_state=42)



In [15]:
# Model building with TfidfVectorizer using spaCy's stop words
tfidf_vectorizer = TfidfVectorizer(stop_words=list(stopwords))
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)




In [16]:
# Model Training
clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
clf.fit(X_train_tfidf, y_train)

# Evaluation
predictions = clf.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, predictions))



Accuracy: 0.9166108506363028


In [17]:
# Save model
with open('/content/TwitterSentimentAnalysis/twitter_sentiment.pkl', 'wb') as f:
    pickle.dump((clf, tfidf_vectorizer), f)

In [18]:
%%writefile app.py

import re
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
import pickle
import spacy
import streamlit as st

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Function to remove emails from text
def remove_emails(x):
    return re.sub(r'([a-z0-9+._-]+@[a-z0-9+._-]+\.[a-z0-9+_-]+)', "", x)

# Function to remove URLs from text
def remove_urls(x):
    return re.sub(r'(http|https|ftp|ssh)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?', '', x)

# Function to remove HTML tags from text
def remove_html_tags(x):
    return BeautifulSoup(x, 'lxml').get_text().strip()

# Function to remove special characters from text
def remove_special_chars(x):
    x = re.sub(r'[^\w\s]+', "", x)  # Corrected regex pattern
    x = ' '.join(x.split())
    return x

# Function to remove 'RT' (retweet) from text
def remove_rt(x):
    return re.sub(r'\brt\b', '', x).strip()

# Load model and vectorizer
with open('/content/TwitterSentimentAnalysis/twitter_sentiment.pkl', 'rb') as f:
    clf, tfidf_vectorizer = pickle.load(f)

def predict_sentiment(tweet):
    tweet = tweet.lower()
    tweet = remove_urls(tweet)
    tweet = remove_html_tags(tweet)
    tweet = remove_special_chars(tweet)
    tweet = remove_rt(tweet)

    # Vectorize the tweet
    tweet_vectorized = tfidf_vectorizer.transform([tweet])

    # Predict sentiment
    sentiment = clf.predict(tweet_vectorized)[0]

    return sentiment

def main():
    st.title("Twitter Sentiment Analysis")
    st.sidebar.title("Options")

    option = st.sidebar.selectbox("Choose an option", ["Predict Sentiment", "About"])

    if option == "Predict Sentiment":
        st.header("Predict Sentiment")
        tweet = st.text_area("Enter a tweet")
        if st.button("Predict"):
            if tweet:
                sentiment = predict_sentiment(tweet)
                st.write("Sentiment:", sentiment)
            else:
                st.warning("Please enter a tweet.")
    else:
        st.sidebar.info("This app performs sentiment analysis on tweets.")
        st.sidebar.text("Built with Streamlit")

if __name__ == "__main__":
    main()


Writing app.py


In [19]:
# Step 1: Install the necessary packages
!pip install pyngrok
!pip install streamlit
!pip install streamlit-option-menu

# !pip install -r "/content/TwitterSentimentAnalysis/requirements.txt"

Collecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.1.6
Collecting streamlit
  Downloading streamlit-1.34.0-py2.py3-none-any.whl (8.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m41.0 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/8

In [20]:
import os
from threading import Thread
from pyngrok import ngrok

In [21]:
# Add your ngrok token here (Token on RV)
ngrok.set_auth_token('2gMWkgc9hI0D9VSxPhBpTaZ8TNS_6nEW3sXWqu5qReLmn3E2t')



In [22]:
def run_streamlit():
    # Change the port if 8501 is already in use or if you prefer another port
    os.system('streamlit run /content/app.py --server.port 8501')

In [23]:
# Start a thread to run the Streamlit app
thread = Thread(target=run_streamlit)
thread.start()

In [24]:
# Open a tunnel to the streamlit port 8501
public_url = ngrok.connect(addr='8501', proto='http', bind_tls=True)
print('Your Streamlit app is live at:', public_url)

Your Streamlit app is live at: NgrokTunnel: "https://7ff5-35-237-149-53.ngrok-free.app" -> "http://localhost:8501"


In [25]:
ngrok.kill()