In [6]:
import os
import re
import numpy as np
from PyPDF2 import PdfReader
from sklearn.linear_model import LinearRegression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# ---------- Step 1: Read PDFs ----------
def read_pdfs(folder='pdfs'):
    text_data = ""
    for file in os.listdir(folder):
        if file.endswith(".pdf"):
            reader = PdfReader(os.path.join(folder, file))
            for page in reader.pages:
                txt = page.extract_text()
                if txt:
                    text_data += txt + " "
    return text_data

# ---------- Step 2: Split into sentences ----------
def process_text(text):
    text = re.sub(r'\s+', ' ', text)
    sentences = re.split(r'(?<=[.!?]) +', text)
    data = [s.strip().lower() for s in sentences if len(s.strip()) > 20]
    X = np.arange(len(data)).reshape(-1, 1)
    y = np.arange(len(data))
    return X, y, data

# ---------- Step 3: Build DNN ----------
def build_dnn():
    model = Sequential([
        Dense(16, activation='relu', input_shape=(1,)),
        Dense(8, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# ---------- Step 4: Chatbot ----------
def chatbot():
    print("Reading PDFs...")
    text = read_pdfs()
    X, y, data = process_text(text)

    if not data:
        print("No readable text found in PDFs.")
        return

    print("Training models...")
    dnn = build_dnn()
    dnn.fit(X, y, epochs=3, verbose=0)

    reg = LinearRegression()
    reg.fit(X, y)

    print("\nChatbot ready! Type your question (type 'exit' to quit)\n")

    while True:
        q = input("You: ").strip().lower()
        if q == "exit":
            print("Chatbot: Bye")
            break
        if not q:
            continue

        # Keyword match score for all sentences
        scores = [sum(1 for w in q.split() if w in s) for s in data]
        if max(scores) == 0:
            print("Chatbot: Sorry, I couldn’t find anything relevant.\n")
            continue

        # Combine top relevant sentences until 100 words
        sorted_idx = np.argsort(scores)[::-1]
        combined_words = []
        for idx in sorted_idx:
            combined_words.extend(data[idx].split())
            if len(combined_words) >= 100:
                break

        answer = " ".join(combined_words[:100])
        if len(combined_words) > 100:
            answer += "..."

        print("\nChatbot:\n", answer)
        print("\n" + "-"*80 + "\n")

# ---------- Run ----------
if __name__ == "__main__":
    chatbot()


Reading PDFs...
Training models...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Chatbot ready! Type your question (type 'exit' to quit)


Chatbot:
 sdlc models software development life cycle (sdlc) is a spiritual model used in project management that defines the stages include in an information system development project, from an initial feasibility study to the maintenance of the completed application. the stages of sdlc are as follows: stage1: planning and requirement analysis requirement analysis is the most important and necessary stage in sdlc. when to use sdlc waterfall model? here, are some important phases of sdlc life cycle: waterfall model the waterfall is a universally accepted sdlc model. need of sdlc the development team must determine a suitable life cycle model for...

--------------------------------------------------------------------------------


Chatbot:
 stage4: developing the project after the code is generated, it is tested against the requirements to make sure that the products are solving the needs addressed and gathered during the requi