# Installs and Imports

In [1]:
# 1. Setup and Imports
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import requests
from bs4 import BeautifulSoup
from docx import Document
import fitz  # PyMuPDF for PDF files
import datetime
import pytz
from geopy.geocoders import Nominatim
from timezonefinder import TimezoneFinder
import re  # For regular expressions
import openpyxl  # For .xlsx files
import xlrd  # For .xls files

# 2. Utility Functions

def extract_info_from_text(text):
    due_date_match = re.search(r"Due Date: (\d{2}/\d{2}/\d{4})", text)
    due_date = due_date_match.group(1) if due_date_match else "Not found"
    # Implement similar searches for class number/code, assignment details, etc.
    return {'due_date': due_date}

def process_docx(file_content):
    doc = Document(file_content)
    text = "\n".join([para.text for para in doc.paragraphs])
    return extract_info_from_text(text)

def process_pdf(file_content):
    try:
        doc = fitz.open(stream=file_content, filetype="pdf")
        text = ''.join([page.get_text("text") for page in doc])
        return extract_info_from_text(text)
    except Exception as e:
        print(f"Failed to process PDF file: {e}")
        return {}

def process_excel(file_content, file_type='xlsx'):
    df = pd.read_excel(file_content, engine='openpyxl' if file_type == 'xlsx' else 'xlrd')
    return df.to_dict('records')

def process_csv(file_content):
    df = pd.read_csv(file_content)
    return df.to_dict('records')

def scrape_and_process_html(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        text = soup.get_text()
        return extract_info_from_text(text)
    except Exception as e:
        print(f"Failed to scrape HTML from {url}: {e}")
        return {}

def process_file_upload(change):
    uploaded_files = file_upload.value  # Correctly access the uploaded files
    for name, file_info in uploaded_files.items():
        content = file_info['content']
        if name.endswith('.xlsx') or name.endswith('.xls'):
            # Process Excel files
            assignments = process_excel(content, 'xlsx' if name.endswith('.xlsx') else 'xls')
        elif name.endswith('.csv'):
            # Process CSV files
            assignments = process_csv(content)
        elif name.endswith('.pdf'):
            # Process PDF files
            assignments = process_pdf(content)
        elif name.endswith('.docx'):
            # Process DOCX files
            assignments = process_docx(content)
        else:
            print(f"Unsupported file type: {name}")
            assignments = []
       
        
# 3. Set up the widgets for file upload and URL input
file_upload = widgets.FileUpload(accept='.xlsx, .xls, .pdf, .docx', multiple=True, description='Upload Files')
file_upload.observe(process_file_upload, names='value')

url_input = widgets.Text(placeholder='Enter URL here', description='URL:')
submit_button = widgets.Button(description='Submit URL', button_style='', tooltip='Click to submit the URL')

def submit_url(b):
    url = url_input.value
    if url:
        result = scrape_and_process_html(url)
        print(result)  # Display or handle the scraped data
    else:
        print("Please enter a URL.")

submit_button.on_click(submit_url)

# 4. Organize and display UI components
upload_box = widgets.VBox([widgets.Label('Supported file types: Docs (Word), PDF, Excel'), file_upload])
url_box = widgets.VBox([url_input, submit_button])
main_box = widgets.VBox([upload_box, url_box])
display(main_box)


VBox(children=(VBox(children=(Label(value='Supported file types: Docs (Word), PDF, Excel'), FileUpload(value=(…