In [None]:
# Install required libraries
!pip install PyPDF2
!pip install transformers
!pip install openpyxl

# Step 1: Import libraries
from transformers import pipeline
from PyPDF2 import PdfReader
import pandas as pd
import re

# Step 2: Define a simple sentence tokenizer
def simple_sent_tokenize(text):
    sentences = re.split(r'[.!?]', text)
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
    return sentences

# Step 3: Extract text from a PDF
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [None]:
# Step 4: Upload PDF files
from google.colab import files
print("Upload Frankenstein PDF:")
frankenstein_file = files.upload()
frankenstein_path = list(frankenstein_file.keys())[0]

print("Upload The Prelude PDF:")
prelude_file = files.upload()
prelude_path = list(prelude_file.keys())[0]

# Step 5: Extract text from PDFs
frankenstein_text = extract_text_from_pdf(frankenstein_path)
prelude_text = extract_text_from_pdf(prelude_path)


Upload Frankenstein PDF:


Saving frankenstein.pdf to frankenstein.pdf
Upload The Prelude PDF:


Saving prelude.pdf to prelude.pdf


In [None]:
# Step 6: Initialize sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")



No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


In [None]:
# Step 7: Define custom sentiment analysis function with text cleaning
def clean_text(text):
    # Remove illegal characters (e.g., ASCII control characters)
    return re.sub(r'[^\x20-\x7E]', '', text)

def custom_sentiment_analysis(text):
    sentences = simple_sent_tokenize(text)
    results = []
    for sentence in sentences[:500]:  # Limit to 500 sentences for performance
        clean_sentence = clean_text(sentence)  # Clean the sentence
        sentiment = sentiment_pipeline(clean_sentence)
        label = sentiment[0]["label"]
        score = sentiment[0]["score"]
        fear = score if "NEGATIVE" in label else 0  # Approximation for fear
        joy = score if "POSITIVE" in label else 0   # Approximation for joy
        results.append({
            "Sentence": clean_sentence,
            "Fear": fear,
            "Joy": joy
        })
    return results

# Step 8: Perform sentiment analysis with cleaned text
frankenstein_analysis = custom_sentiment_analysis(frankenstein_text)
prelude_analysis = custom_sentiment_analysis(prelude_text)

# Step 9: Convert to DataFrames
frankenstein_df = pd.DataFrame(frankenstein_analysis)
prelude_df = pd.DataFrame(prelude_analysis)

# Step 10: Export detailed analysis to Excel
with pd.ExcelWriter("Sentiment_Analysis_Fear_Joy.xlsx", engine="openpyxl") as writer:
    frankenstein_df.to_excel(writer, sheet_name="Frankenstein Analysis", index=False)
    prelude_df.to_excel(writer, sheet_name="The Prelude Analysis", index=False)

print("Analysis results have been saved to Sentiment_Analysis_Fear_Joy.xlsx.")


Analysis results have been saved to Sentiment_Analysis_Fear_Joy.xlsx.


In [None]:
# Step 11: Summarize the sentiment scores
summary = pd.DataFrame({
    "Text": ["Frankenstein", "The Prelude"],
    "Average Fear": [frankenstein_df["Fear"].mean(), prelude_df["Fear"].mean()],
    "Average Joy": [frankenstein_df["Joy"].mean(), prelude_df["Joy"].mean()]
})

print("Sentiment Analysis Summary:")
print(summary)


Sentiment Analysis Summary:
           Text  Average Fear  Average Joy
0  Frankenstein      0.406034     0.549111
1   The Prelude      0.340167     0.567661
