## Twitter Sentiment Analysis with Random Forest

In [63]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd

df = pd.read_csv('clean_tweet.csv', index_col=[0])
df.head()

Unnamed: 0,id,keyword,location,text,target,char_counts,word_counts,avg_wordlength,stopwords_counts,hashtag_counts,mentions_counts,digits_counts,uppercase_counts
0,1,,,our deeds are the reason of this earthquake ma...,1,57,13,4.384615,6,1,0,0,1
1,4,,,forest fire near la ronge sask canada,1,32,7,4.571429,0,0,0,1,0
2,5,,,all residents asked to shelter in place are be...,1,112,22,5.090909,9,0,0,1,0
3,6,,,130 people receive wildfires evacuation orders...,1,57,8,7.125,1,1,0,1,0
4,7,,,just got sent this photo from ruby alaska as s...,1,72,16,4.5,6,2,0,0,0


In [64]:
X = df['text']
y = df['target']

In [65]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [66]:
X_train.shape,X_test.shape

((6090,), (1523,))

In [67]:
from wordcloud import STOPWORDS
stopwords = set(STOPWORDS)


stopwords_list = list(stopwords)


In [68]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

# Create a list of stopwords
stopwords_list = list(stopwords)

# Define the pipeline
clf = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words=stopwords_list)),
    ('clf', RandomForestClassifier(n_estimators=100, n_jobs=-1))
])

# Fit the model
clf.fit(X_train, y_train)

# Evaluate the model
from sklearn.metrics import accuracy_score,classification_report


predictions = clf.predict(X_test)
print("accuracy score :",accuracy_score(y_test, predictions))
print(classification_report(y_test,predictions))


accuracy score : 0.7760998030203545
              precision    recall  f1-score   support

           0       0.78      0.85      0.81       874
           1       0.77      0.68      0.72       649

    accuracy                           0.78      1523
   macro avg       0.77      0.76      0.77      1523
weighted avg       0.78      0.78      0.77      1523



In [69]:
import pickle

pickle.dump(clf, open('model/RF_twitter_sentiment.pkl', 'wb'))

In [70]:
clf.predict(['earthquake'])

array([1], dtype=int64)

In [71]:
def predict_disaster(sentences, model):
    predictions = model.predict(sentences)
    for sentence, pred in zip(sentences, predictions):
        # Interpret the prediction
        prediction_label = 'Disaster Tweet' if pred == 1 else 'Normal Tweet'
        
        # Print the sentence and its prediction
        print(f"Sentence: {sentence}")
        print(f"Prediction: {prediction_label}\n")


In [72]:
sentences = [
  "An intense hurricane has made landfall, causing widespread flooding and destruction in coastal areas. Stay safe and evacuate if you can!",
    "Heard about #earthquake is different cities, stay safe everyone.","weather is very good to play cricket",
    "@RosieGray Now in all sincerety do you think the UN would move to Israel if there was a fraction of a chance of being annihilated?"
  ]
result = predict_disaster(sentences,clf)


Sentence: An intense hurricane has made landfall, causing widespread flooding and destruction in coastal areas. Stay safe and evacuate if you can!
Prediction: Disaster Tweet

Sentence: Heard about #earthquake is different cities, stay safe everyone.
Prediction: Disaster Tweet

Sentence: weather is very good to play cricket
Prediction: Normal Tweet

Sentence: @RosieGray Now in all sincerety do you think the UN would move to Israel if there was a fraction of a chance of being annihilated?
Prediction: Normal Tweet



In [73]:
import joblib

In [74]:
# Save the model
joblib.dump(clf, 'model/rf_tweet.pkl')


['model/rf_tweet.pkl']

In [75]:
import joblib
print("joblib version:", joblib.__version__)


joblib version: 1.3.2


In [76]:
import sklearn
print("scikit-learn version:", sklearn.__version__)


scikit-learn version: 1.4.1.post1


In [82]:
! pip install joblib==1.4.1

ERROR: Could not find a version that satisfies the requirement joblib==1.4.1 (from versions: 0.3.2d.dev, 0.3.2e.dev, 0.3.2f.dev, 0.3.2g.dev, 0.7.0d, 0.1a0.dev0, 0.2a0.dev0, 0.3a0.dev0, 0.3.1a0.dev0, 0.3.2.dev0, 0.3.2a0.dev0, 0.3.2b0.dev0, 0.3.2rc0.dev0, 0.3.3a0.dev0, 0.3.3b0.dev0, 0.3.3rc0.dev0, 0.3.4.dev0, 0.3.5.dev0, 0.3.6.dev0, 0.3.7.dev0, 0.4.0.dev0, 0.4.1.dev0, 0.4.2.dev0, 0.4.3.dev0, 0.4.4.dev0, 0.4.5.dev0, 0.4.6.dev0, 0.5.0.dev0, 0.5.0a0.dev0, 0.5.1.dev0, 0.5.2.dev0, 0.5.3.dev0, 0.5.4.dev0, 0.5.5.dev0, 0.5.6.dev0, 0.5.7.dev0, 0.5.7a0.dev0, 0.5.7b0.dev0, 0.5.7, 0.6.0a0, 0.6.0b0, 0.6.0b2, 0.6.0b3, 0.6.0, 0.6.1, 0.6.2, 0.6.3, 0.6.4, 0.6.5, 0.7.0a0, 0.7.0b0, 0.7.0rc0, 0.7.1, 0.8.0a0, 0.8.0a2, 0.8.0a3, 0.8.0, 0.8.1, 0.8.2, 0.8.3, 0.8.3.post1, 0.8.4, 0.9.0b2, 0.9.0b3, 0.9.0b4, 0.9.1, 0.9.2, 0.9.3, 0.9.4, 0.10.0, 0.10.2, 0.10.3, 0.11a3, 0.11, 0.12.0, 0.12.1, 0.12.2, 0.12.3, 0.12.4, 0.12.5, 0.13.0, 0.13.1, 0.13.2, 0.14.0, 0.14.1, 0.15.0, 0.15.1, 0.16.0, 0.17.0, 1.0.0, 1.0.1, 1.1.0a0, 1.

In [78]:
# ! pip install scikit-learn==1.3.2

In [89]:
import joblib



# Load the model using joblib
try:
    model = joblib.load(r"model\rf_tweet.pkl")
    print(f"Model loaded successfully from {file_path}")
    # Explore the model's attributes and parameters
    print("Model parameters:", model.get_params())
except Exception as e:
    print(f"Error loading model: {e}")


f_tweet.pkld successfully from ..\model
Model parameters: {'memory': None, 'steps': [('tfidf', TfidfVectorizer(stop_words=['from', "we've", "i'm", 'those', 'all', 'com',
                            "you'd", 'only', 'when', 'it', 'his', "i'll",
                            'yours', 'further', "we'll", "there's", 'against',
                            "won't", 'what', 'else', 'its', 'other', 'during',
                            'yourself', 'has', 'otherwise', 'that', "shouldn't",
                            'here', 'if', ...])), ('clf', RandomForestClassifier(n_jobs=-1))], 'verbose': False, 'tfidf': TfidfVectorizer(stop_words=['from', "we've", "i'm", 'those', 'all', 'com',
                            "you'd", 'only', 'when', 'it', 'his', "i'll",
                            'yours', 'further', "we'll", "there's", 'against',
                            "won't", 'what', 'else', 'its', 'other', 'during',
                            'yourself', 'has', 'otherwise', 'that', "shouldn't",
       