# Importing Libraries

In [4]:
import torch
from transformers import pipeline
from datasets import load_dataset
import pandas as pd

# Loading Dataset

In [5]:
dataset = pd.read_csv("IMDB Dataset.csv")

In [6]:
dataset.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [8]:
dataset.tail()

Unnamed: 0,review,sentiment
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative
49999,No one expects the Star Trek movies to be high...,negative


In [12]:
# Load sentiment analysis pipeline with a specific pre-trained model
sentiment_model_name = "distilbert-base-uncased-finetuned-sst-2-english"  # You can change this to any other model you prefer
sentiment_analyzer = pipeline("sentiment-analysis", model=sentiment_model_name)


In [27]:
# Load NER pipeline with a specific pre-trained model
ner_model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"  # You can change this to any other NER model you prefer
ner_tagger = pipeline("ner", model=ner_model_name, aggregation_strategy="simple")

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [19]:
# Sentiment analysis on a few samples from IMDb dataset (first 5 reviews)
sample_texts = dataset['review'][:5].tolist()  # Convert to list of strings

In [20]:
# Results for sentiment analysis and NER
sentiment_results = sentiment_analyzer(sample_texts)
ner_results = [ner_tagger(text) for text in sample_texts]

In [25]:

from bs4 import BeautifulSoup

# Function to remove HTML tags
def clean_html(text):
    return BeautifulSoup(text, "html.parser").get_text()

# Display results
print("\nSentiment Analysis Results:")
for i, result in enumerate(sentiment_results):
    clean_text = clean_html(sample_texts[i])  # Clean HTML tags from the review text
    print(f"Review {i+1}:")
    print(f"Text: {clean_text}")
    print(f"Sentiment: {result['label']} | Confidence: {result['score']:.4f}")
    print("-" * 50)

print("\nNamed Entity Recognition Results:")
for i, entities in enumerate(ner_results):
    clean_text = clean_html(sample_texts[i])  # Clean HTML tags from the review text
    print(f"Review {i+1}:")
    print(f"Text: {clean_text}")
    if entities:
        for entity in entities:
            print(f"Entity: {entity['word']} | Label: {entity['entity_group']} | Score: {entity['score']:.4f}")
    else:
        print("No entities detected.")
    print("-" * 50)



Sentiment Analysis Results:
Review 1:
Text: One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.I would say the main appeal of the show is due t