# Random Forest Classification

## Importing the libraries

In [5]:
import numpy as np

import pandas as pd

## Importing the dataset

In [14]:
dataset = pd.read_csv(
    "Data.csv",
    header=None,
    names=["review", "sentiment"],
    engine="python",
    on_bad_lines="skip"
)
X = dataset["review"].astype(str)
y = dataset["sentiment"]






                                              review  sentiment
0                                             review  sentiment
1  One of the other reviewers has mentioned that ...   positive
2  A wonderful little production. <br /><br />The...   positive
3  I thought this was a wonderful way to spend ti...   positive
4  Basically there's a family where a little boy ...   negative


## Splitting the dataset into the Training set and Test set

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Vecotrizing data

In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words="english", max_features=5000, ngram_range=(1,2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


## Training the Random Forest Classification model on the Training set

In [18]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
classifier.fit(X_train_vec, y_train)

## Making the Confusion Matrix

In [19]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test_vec)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[4948  833]
 [1005 4884]]


0.8425021422450728

# Real time prediction function


In [20]:
def predict_sentiment(user_text):
    """Predict sentiment of a single text input."""
    text = user_text.lower()
    vec = vectorizer.transform([text])
    prediction = classifier.predict(vec)[0]
    confidence = classifier.predict_proba(vec).max()
    return prediction, confidence

## Creating UI for colab

In [23]:
from ipywidgets import Textarea, Button, VBox, Output
text_box = Textarea(
    placeholder="Type your review here...",
    layout={"width": "600px"}
)
button = Button(description="Analyze")
output = Output()
def analyze(b):
    with output:
        output.clear_output()
        sentiment, confidence = predict_sentiment(text_box.value)
        print(f"Sentiment: {sentiment}")
        print(f"Confidence: {confidence:.2f}")

button.on_click(analyze)
VBox([text_box, button, output])



VBox(children=(Textarea(value='', layout=Layout(width='600px'), placeholder='Type your review here...'), Butto…