In [1]:
import numpy as np
import pandas as pd
from termcolor import colored
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
# Load dataclean_test.csv
print(colored("Loading train and test data...", "yellow"))
train_data = pd.read_csv('/content/clean_train.csv')
test_data = pd.read_csv('/content/clean_test.csv')
print(colored("Data loaded successfully.", "green"))

Loading train and test data...
Data loaded successfully.


In [3]:
# TF-IDF Vectorization
print(colored("Applying TF-IDF transformation...", "yellow"))
tfidf_vectorizer = TfidfVectorizer(min_df=5, max_df=0.7, max_features=2000, ngram_range=(1, 2))
tfidf_vectorizer.fit(train_data['Clean_tweet'].astype(str))

Applying TF-IDF transformation...


In [4]:
# Transform train and test data
train_tweet_vector = tfidf_vectorizer.transform(train_data['Clean_tweet'].astype(str))
test_tweet_vector = tfidf_vectorizer.transform(test_data['Clean_tweet'].astype(str))
print(colored("TF-IDF transformation complete.", "green"))

TF-IDF transformation complete.


In [5]:
# Random Forest Classifier
print(colored("Training Random Forest Classifier...", "yellow"))
random_forest_classifier = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
random_forest_classifier.fit(train_tweet_vector, train_data['Sentiment'])
print(colored("Model training complete.", "green"))

Training Random Forest Classifier...
Model training complete.


In [6]:
# Prediction and Accuracy
def evaluate_model(model, train_vector, train_labels, test_vector, test_labels):
    # Training accuracy
    train_pred = model.predict(train_vector)
    train_accuracy = accuracy_score(train_labels, train_pred)
    print(colored(f"Training accuracy: {train_accuracy * 100:.2f}%", "green"))

    # Testing accuracy
    test_pred = model.predict(test_vector)
    test_accuracy = accuracy_score(test_labels, test_pred)
    print(colored(f"Testing accuracy: {test_accuracy * 100:.2f}%", "green"))

In [7]:
# Evaluate the model
evaluate_model(random_forest_classifier, train_tweet_vector, train_data['Sentiment'], test_tweet_vector, test_data['Sentiment'])

Training accuracy: 97.01%
Testing accuracy: 97.01%


In [8]:
import joblib

# Save the model and vectorizer
joblib.dump(random_forest_classifier, 'random_forest_model.pkl')
joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')
print(colored("Model and vectorizer saved successfully.", "green"))

Model and vectorizer saved successfully.


In [9]:
# Load the model and vectorizer
loaded_model = joblib.load('random_forest_model.pkl')
loaded_vectorizer = joblib.load('tfidf_vectorizer.pkl')
print(colored("Model and vectorizer loaded successfully.", "green"))

Model and vectorizer loaded successfully.


In [10]:
# Function to predict sentiment of a new example
def predict_sentiment(text):
    text_vector = loaded_vectorizer.transform([text])
    prediction = loaded_model.predict(text_vector)
    sentiment = "Positive" if prediction[0] == 4 else "Negative"
    print(colored(f"The sentiment for the given text is: {sentiment}", "blue"))

# Predicting on a new example
example_text = "I absolutely love this product! Highly recommend it."
predict_sentiment(example_text)

The sentiment for the given text is: Positive
