In [None]:
import streamlit as st
from PIL import Image
import pytesseract
from pdf2image import convert_from_path  #PyMuPDF
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import os
import fitz  #imports PyMuPDF
from docx import Document

In [None]:
#Tesseract path (only required for local Windows users)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"  # Change if you're on Windows

# Load pre-trained sentiment model
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device("cpu")
model.to(device)

In [None]:
#Text Extraction Functions
def extract_text_from_image(image):
    return pytesseract.image_to_string(image).strip()

def extract_text_from_pdf(pdf_path):
    text = ""
    doc = fitz.open(pdf_path)
    for page in doc:
        text += page.get_text()
    return text.strip()

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    return "\n".join([para.text for para in doc.paragraphs]).strip()

# ---- SENTIMENT PREDICTION FUNCTION ----
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    inputs = {key: val.to(device) for key, val in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probs = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()[0]

    sentiment_labels = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"]
    predicted_label = sentiment_labels[np.argmax(probs)]
    confidence = round(np.max(probs) * 100, 2)

    return predicted_label, confidence

In [None]:
#StreamLit UI
st.set_page_config(page_title="Feedback Sentiment Analysis", layout="centered")
st.title("📄 Feedback Form Sentiment Analysis")
st.write("Upload a feedback form (.docx, .pdf, .jpg, .png), extract text, and get sentiment analysis!")

uploaded_file = st.file_uploader("Upload your feedback file", type=["jpg", "png", "pdf", "docx"])

if uploaded_file:
    file_type = uploaded_file.name.split(".")[-1].lower()
    extracted_text = ""

    if file_type in ["jpg", "png"]:
        image = Image.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)
        extracted_text = extract_text_from_image(image)

    elif file_type == "pdf":
        with open("temp.pdf", "wb") as f:
            f.write(uploaded_file.read())
        extracted_text = extract_text_from_pdf("temp.pdf")
        os.remove("temp.pdf")

    elif file_type == "docx":
        with open("temp.docx", "wb") as f:
            f.write(uploaded_file.read())
        extracted_text = extract_text_from_docx("temp.docx")
        os.remove("temp.docx")

    if extracted_text:
        st.subheader("📝 Extracted Feedback Text:")
        st.write(extracted_text)

        with st.spinner("Analyzing sentiment..."):
            sentiment, confidence = predict_sentiment(extracted_text)

        st.subheader("📊 Sentiment Result:")
        st.success(f"**Sentiment:** {sentiment}")
        st.info(f"**Confidence Score:** {confidence}%")
    else:
        st.w

In [None]:
#Deployment
#Run in terminal and host
# Use - streamlit run Model1.py