In [16]:
import spacy
import pandas as pd
import requests
from transformers import pipeline
import streamlit as st


In [2]:
# Load SpaCy's small English model
nlp = spacy.load('en_core_web_sm')

In [3]:
# Load semantic analysis model (using DistilBERT for simplicity)
semantic_model = pipeline("text-classification", model="distilbert-base-uncased")




Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
# Load your dataset (replace 'your_books_dataset.csv' with the path to your dataset)
df = pd.read_csv("C:/Users/DELL/Documents/NLP/nlp project/Amazon_Books_Scraping/Books_df.csv")

In [5]:
# Function for syntax verification using POS tagging and dependency parsing
def syntax_verification(text):
    doc = nlp(text)
    feedback = []
    for token in doc:
        if token.dep_ == 'ROOT' and token.pos_ not in ['VERB', 'AUX']:
            feedback.append(f"Possible syntax error at '{token.text}' - expected a verb.")
    return feedback if feedback else "The sentence is syntactically correct."

In [6]:
# Function for semantic verification using a pre-trained model
def semantic_verification(text):
    result = semantic_model(text)[0]
    if result['label'] == 'LABEL_1':  # Assuming LABEL_1 means semantically correct
        return "The sentence makes sense."
    else:
        return "The sentence may not be semantically correct."


In [7]:
# Function to get part-of-speech tagging
def pos_tagging(text):
    doc = nlp(text)
    return [(token.text, token.pos_) for token in doc]


In [8]:
# Function to perform NER (Named Entity Recognition)
def named_entity_recognition(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]

In [9]:
# Function to perform chunking (Noun/Verb Phrases)
def chunking(text):
    doc = nlp(text)
    return [(chunk.text, chunk.root.dep_) for chunk in doc.noun_chunks]


In [10]:
# Function for dependency parsing
def dependency_parsing(text):
    doc = nlp(text)
    return [(token.text, token.dep_, token.head.text) for token in doc]


In [11]:
# Function to fetch book recommendations from your dataset
def fetch_books_from_dataset(query):
    # Filter dataset based on the query
    matched_books = df[df['Description'].str.contains(query, case=False, na=False) |
                       df['Title'].str.contains(query, case=False, na=False)]
    
    if not matched_books.empty:
        return [{"title": row['Title'], "author": row['Author']} for idx, row in matched_books.iterrows()]
    else:
        return ["No matching books found in dataset."]


In [12]:
# Function to fetch book data from Google Books API
def fetch_books_from_google(query):
    google_api_url = f"https://www.googleapis.com/books/v1/volumes?q={query}"
    response = requests.get(google_api_url)
    
    if response.status_code == 200:
        books = response.json().get('items', [])
        results = [{"title": book["volumeInfo"]["title"], 
                    "author": book["volumeInfo"].get("authors", ["Unknown"])[0]} for book in books[:5]]
        return results if results else ["No matching books found in Google Books API."]
    return ["Failed to fetch data from Google Books API."]


In [13]:
# Function to fetch book data from Open Library API
def fetch_books_from_open_library(query):
    open_library_api_url = f"https://openlibrary.org/search.json?q={query}"
    response = requests.get(open_library_api_url)
    
    if response.status_code == 200:
        books = response.json().get('docs', [])
        results = [{"title": book["title"], 
                    "author": book.get("author_name", ["Unknown"])[0]} for book in books[:5]]
        return results if results else ["No matching books found in Open Library API."]
    return ["Failed to fetch data from Open Library API."]

In [14]:
# Streamlit Interface
def main():
    st.title("NLP Exam Preparation Chatbot with Dataset & API Integration")

    user_input = st.text_input("Ask me a question or input a sentence:")

    if st.button("Analyze"):
        if user_input:
            # Step 1: Syntax Verification
            syntax_feedback = syntax_verification(user_input)
            st.subheader("Syntax Verification")
            st.write(syntax_feedback)

            # Step 2: Semantic Verification
            semantic_feedback = semantic_verification(user_input)
            st.subheader("Semantic Verification")
            st.write(semantic_feedback)

            # Step 3: POS Tagging
            pos_tags = pos_tagging(user_input)
            st.subheader("Part-of-Speech Tagging")
            for word, tag in pos_tags:
                st.write(f"{word}: {tag}")

            # Step 4: Named Entity Recognition
            entities = named_entity_recognition(user_input)
            st.subheader("Named Entity Recognition")
            for entity, label in entities:
                st.write(f"{entity}: {label}")

            # Step 5: Chunking
            chunks = chunking(user_input)
            st.subheader("Chunking")
            for chunk, dep in chunks:
                st.write(f"{chunk}: {dep}")

            # Step 6: Dependency Parsing
            dependencies = dependency_parsing(user_input)
            st.subheader("Dependency Parsing")
            for word, dep, head in dependencies:
                st.write(f"{word}: {dep} → {head}")

            # Step 7: Book Recommendations from Dataset
            st.subheader("Book Recommendations from Dataset")
            books_from_dataset = fetch_books_from_dataset(user_input)
            for book in books_from_dataset:
                if isinstance(book, dict):
                    st.write(f"Title: {book['title']}, Author: {book['author']}")
                else:
                    st.write(book)

            # Step 8: Book Recommendations from Google Books API
            st.subheader("Book Recommendations from Google Books API")
            books_from_google = fetch_books_from_google(user_input)
            for book in books_from_google:
                if isinstance(book, dict):
                    st.write(f"Title: {book['title']}, Author: {book['author']}")
                else:
                    st.write(book)

            # Step 9: Book Recommendations from Open Library API
            st.subheader("Book Recommendations from Open Library API")
            books_from_open_library = fetch_books_from_open_library(user_input)
            for book in books_from_open_library:
                if isinstance(book, dict):
                    st.write(f"Title: {book['title']}, Author: {book['author']}")
                else:
                    st.write(book)
        else:
            st.write("Please enter a sentence or question.")

In [15]:
if __name__ == "__main__":
    main()

2024-12-10 12:16:13.196 
  command:

    streamlit run C:\Users\DELL\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-12-10 12:16:13.202 Session state does not function when running a script without `streamlit run`
