In [None]:
import os
import os
import shutil
import glob

In [None]:
#get item of folder
def list_files(directory):
    pdf_files = glob.glob(os.path.join(directory, '*.pdf'))
    excel_files = glob.glob(os.path.join(directory, '*.xlsx'))
    return pdf_files + excel_files

def file_info(directory, filename):
    filepath = os.path.join(directory, filename)
    if os.path.exists(filepath):
        file_stats = os.stat(filepath)
        return print(f"Name: {filename}\n"
                f"Size: {file_stats.st_size} bytes\n"
                f"Last modified: {file_stats.st_mtime}")
    else:
        return f"File '{filename}' not found."


#searching element in folder with path
def search_files(directory, keyword):
    matches = []
    pdf_files = glob.glob(os.path.join(directory, keyword))
    return print(pdf_files)

#rename your file
def rename_file(directory, old_name, new_name):
    old_filepath = os.path.join(directory, old_name)
    new_filepath = os.path.join(directory, new_name)
    os.rename(old_filepath, new_filepath)
    return print(f"File '{old_name}' renamed to '{new_name}'.")

#copy file
def copy_file(directory, src, dst):
    src_filepath = os.path.join(directory, src)
    dst_filepath = os.path.join(directory, dst)
    shutil.copy2(src_filepath, dst_filepath)
    return print(f"File '{src}' copied to '{dst}'.")

#delete file
def delete_file(directory, filename):
    filepath = os.path.join(directory, filename)
    os.remove(filepath)
    return print(f"File '{filename}' deleted.")

def process_input(directory, user_input):
    if 'list' in user_input:
        return list_files(directory)
    elif 'info' in user_input:
        words = user_input.split()
        pdf_files = [word for word in words if word.endswith(".pdf") or word.endswith(".xlsx")]
        if pdf_files:
            file_name = pdf_files[0]
            if os.path.exists(os.path.join(directory, file_name)):
                file_info(directory, file_name)
            else:
                print("File does not exist.")
        else:
            print("No PDF or Excel file specified in input.")
    elif 'search' in user_input:
        words = user_input.split()
        pdf_files = [word for word in words if word.endswith(".pdf") or word.endswith(".xlsx")]
        if pdf_files:
            file_name = pdf_files[0]
            if os.path.exists(os.path.join(directory, file_name)):
                search_files(directory, file_name)
            else:
                print("File does not exist.")
        else:
            print("No PDF or Excel file specified in input.")
    elif 'delete' in user_input:
        words = user_input.split()
        pdf_files = [word for word in words if word.endswith(".pdf") or word.endswith(".xlsx")]
        if pdf_files:
            file_name = pdf_files[0]
            if os.path.exists(os.path.join(directory, file_name)):
                delete_file(directory, file_name)
            else:
                print("File does not exist.")
        else:
            print("No PDF or Excel file specified in input.")
    elif 'rename' in user_input:
        words = user_input.split()
        pdf_files = [word for word in words if word.endswith(".pdf") or word.endswith(".xlsx")]
        if len(pdf_files) == 2:
            old_file_name, new_file_name = pdf_files
            old_file_path = os.path.join(directory, old_file_name)
            if os.path.exists(old_file_path):
                rename_file(directory, old_file_name, new_file_name)
            else:
                print("File does not exist.")
        else:
            print("Please specify both old and new file names.")
    elif 'copy' in user_input:
        words = user_input.split()
        pdf_files = [word for word in words if word.endswith(".pdf") or word.endswith(".xlsx")]
        if len(pdf_files) == 2:
            old_file_name, new_file_name = pdf_files
            old_file_path = os.path.join(directory, old_file_name)
            if os.path.exists(old_file_path):
                copy_file(directory, old_file_name, new_file_name)
            else:
                print("File does not exist.")
        else:
            print("Please specify both old and new file names.")

# Main loop for interacting with the chatbot
def main():
    directory = input("Enter the folder path where your files exist: ")
    if not os.path.exists(directory):
        print("Folder not found.")
        return

    print("Welcome to FileBot! How can I assist you?")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Goodbye!")
            break
        response = process_input(directory, user_input)
        print("FileBot:", response)
        print('-'*60)

if __name__ == "__main__":
    main()

In [None]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        num_pages = len(pdf_reader.pages)
        for page_num in range(num_pages):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
    return text

# Function to preprocess text
def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    stopwords = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stopwords]
    return ' '.join(tokens)

# Function to initialize TF-IDF vectorizer
def initialize_tfidf_vectorizer(corpus):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
    return tfidf_vectorizer, tfidf_matrix

# Function to get response to user query
def get_response(user_query, tfidf_vectorizer, tfidf_matrix, corpus, top_n=3):
    # Preprocess user query
    user_query = preprocess_text(user_query)
    # Transform user query using TF-IDF vectorizer
    query_vector = tfidf_vectorizer.transform([user_query])
    # Calculate cosine similarity between user query and corpus paragraphs
    similarities = cosine_similarity(query_vector, tfidf_matrix)
    # Get indices of top N most similar paragraphs
    most_similar_indices = similarities.argsort(axis=1).flatten()[-top_n:][::-1]
    # Retrieve corresponding paragraphs
    responses = [corpus[idx] for idx in most_similar_indices]
    long_response = '\n'.join(responses)
    return long_response

# Example usage
def main():
    pdf_path = r"C:\Users\91735\Downloads\output1.pdf"  #replace with your pdf
    extracted_text = extract_text_from_pdf(pdf_path)
    sentences = nltk.sent_tokenize(extracted_text)
    corpus = [preprocess_text(sentence) for sentence in sentences]
    tfidf_vectorizer, tfidf_matrix = initialize_tfidf_vectorizer(corpus)
    
    print("Bot: Hi! I'm a chatbot trained on the contents of a PDF. You can ask me anything about the document. Type 'exit' to end the conversation.")
    
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Bot: Goodbye!")
            break
        response = get_response(user_input, tfidf_vectorizer, tfidf_matrix, sentences)
        print("Bot:", response)
        print('-'*60)

if __name__ == "__main__":
    main()
