In [16]:
import PyPDF2
import os
import pytesseract
from PIL import Image
import pptx
from docx import Document

In [25]:
def pdf_to_text(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def pptx_to_text(pptx_path):
    text = ""
    presentation = pptx.Presentation(pptx_path)
    for slide in presentation.slides:
        for shape in slide.shapes:
            # Extract text
            if hasattr(shape, 'text'):
                text += shape.text + "\n"
            # Extract images
            if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PICTURE:
                text += "Image Here\n"
    return text.strip()

def docx_to_text(docx_path):
    text = ""
    document = Document(docx_path)
    for element in document.element.body:
        if element.tag.endswith('p'):
            paragraph = element.xpath('.//w:t')
            if paragraph:
                text += ''.join(p.text for p in paragraph) + "\n"
        elif element.tag.endswith('tbl'):
            text += "Table Here in csv format:\n"
            csv_rows = []
            for row in element.xpath('.//w:tr'):
                csv_row = ','.join(cell.text.replace(',', '\\,') for cell in row.xpath('.//w:t'))
                csv_rows.append(csv_row)
            text += '\n'.join(csv_rows) + "\n\n"
        elif element.tag.endswith('pic'):
            text += "Image Here\n"
        elif element.tag.endswith('drawing'):
            text += "Drawing Here\n"
        elif element.tag.endswith('hyperlink'):
            hyperlink = element.xpath('.//w:t')
            if hyperlink:
                text += ''.join(h.text for h in hyperlink) + " (Hyperlink)\n"
        elif element.tag.endswith('smartTag'):
            smart_tag = element.xpath('.//w:t')
            if smart_tag:
                text += ''.join(s.text for s in smart_tag) + "\n"
        elif element.tag.endswith('bookmarkStart'):
            text += "Bookmark Start\n"
        elif element.tag.endswith('bookmarkEnd'):
            text += "Bookmark End\n"
        
    return text.strip()

def file_to_text(file_path):
    import os
    _, file_extension = os.path.splitext(file_path)
    file_extension = file_extension.lower()

    if file_extension == '.pdf':
        return pdf_to_text(file_path)
    elif file_extension == '.pptx':
        return pptx_to_text(file_path)
    elif file_extension == '.docx':
        return docx_to_text(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_extension}")

# Example usage:
# file_path = 'path/to/your/file.extension'
# extracted_text = file_to_text(file_path)
# print(extracted_text)


docx_path = "example_files/example_docxs/PhysicsLab.docx"
extracted_text = docx_to_text(docx_path)
print(extracted_text)

# Example usage:
# pptx_path = 'example_pptxs/DeepLearning.pptx'
# extracted_text = pptx_to_text(pptx_path)
# print(extracted_text)


# Example usage:
# pdf_path = 'example_pdfs/AlgorithmHW.pdf'
# extracted_text = pdf_to_text(pdf_path)
# print(extracted_text)


Lab 3: Uncertainties
In this lab, you will learn some of the statistical values and methods that physicists use to describe exactly how certain they are in their measurement or calculation. After this lab, you should be able to define and calculate average, standard deviation, standard error of the mean, and Z-score. You should also be able to use the GraphingTools spreadsheet to fit data with a line and extract the slope and offset from that fit. We will use these skills throughout the rest of the term.
Please show your work for all calculations.
Introduction
Inherent to physics is the quantitative observation of physical phenomena, i.e. measurements. For example, one can measure the universal gravitational constant or the mass of an electron. How precisely we can determine some of these quantities has far-reaching consequences. 
Many physical quantities are predicted to have specific values. However, the value we measure might differ from that value. For a physicist, reporting the va