In [None]:
from flask import Flask, render_template, request
import os
# from source.logger import logging



# Assigning Some requirements
app = Flask(__name__)
UPLOAD_FOLDER = 'source/'  # Folder where uploaded files will be stored
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


# Utility functions
def create_or_check_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
        # logging.info(f"Directory '{directory}' created.")
    else:
        # logging.info(f"Directory '{directory}' already exists.")
        pass


@app.route('/')
def upload_form():
    # logging.info("The index.html file triggred")
    return render_template('index.html')


@app.route('/upload', methods=['POST'])
def upload_file():
    if request.method == 'POST':
        # Check if the POST request has the file part
        # logging.info("POST method recived from the browser. ")
        if 'files[]' not in request.files:
            # logging.info("No file part from the browser. ")
            return 'No file part'

        files = request.files.getlist('files[]')

        # If no files are selected
        if len(files) == 0 or all(file.filename == '' for file in files):
            # logging.info("No file selected from the browser.")
            return 'No selected file'

        create_or_check_directory(app.config['UPLOAD_FOLDER'])

        for file in files:
            if file.filename != '':
                filename = file.filename
                file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
                file.save(file_path)

        return f'{len(files)} files uploaded successfully to {UPLOAD_FOLDER} folder'


if __name__ == '__main__':
    app.run()


In [14]:
import docx2txt as docx2txt
import fitz
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import sys


class TextExtractor:
    def __init__(self):
        pass
    #
    def extract_text_from_docx(self, docx_path):
        try:
            text = docx2txt.process(docx_path)
            return text
        except Exception as e:
            return docx_path


    def extract_text_from_pdf(self, pdf_path):
        text = ""
        pdf_document = fitz.open(pdf_path)
        for page_num in range(pdf_document.page_count):
            page = pdf_document.load_page(page_num)
            text += page.get_text()
        pdf_document.close()
        return text

    def rename_doc_to_docx(self, folder_path):
        for filename in os.listdir(folder_path):
            if filename.endswith(".doc"):
                new_filename = os.path.splitext(filename)[0] + ".docx"
                os.rename(os.path.join(folder_path, filename), os.path.join(folder_path, new_filename))

    def extract_file_names(self, directory_path):
        self.rename_doc_to_docx(directory_path)
        file_names = []
        for root, dirs, files in os.walk(directory_path):
            for file in files:
                file_names.append(file)
        return file_names


class ResumeTextExtractor(TextExtractor):
    def __init__(self):
        super().__init__()

    def extract_text_from_resumes(self, folder_path='uploads/'):
        file_names = self.extract_file_names(folder_path)
        text_dict = {}
        corrupted_files = []

        try:
            for file_name in file_names:
                if file_name.endswith('.pdf'):
                    text = self.extract_text_from_pdf(folder_path + file_name)
                    text_dict[file_name] = text
                elif file_name.endswith('.docx'):
                    text = self.extract_text_from_docx(folder_path + file_name)
                    if text == folder_path + file_name:
                        corrupted_files.append(text)
                    else:
                        text_dict[file_name] = text
            return text,text_dict, corrupted_files

        except Exception as e:
            return None, []


In [16]:
ResumeTextExtractor_ = ResumeTextExtractor()
text ,text_dict, corrupted_files = ResumeTextExtractor_.extract_text_from_resumes(folder_path='uploads/')

In [27]:
preprocessed_text = (text.replace('\n', '').replace('\t', ''))

In [28]:
FIELD = "Science technology"

prompt = f""" 

Extract key elements from the provided research paper text. The paper falls under the [{FIELD}] category, and I need information on the following sections:
- Title
- Abstract
- Introduction
- Literature Review
- Research Question/Hypothesis (if applicable)
- Methodology
- Results
- Discussion
- Conclusion
- References
- Ethical Considerations (if applicable)
- Figures/Tables

The paper text is as follows:
"{preprocessed_text}"

Provide detailed information for each section in markdown, summarizing the content and key findings. If a section is not present in the paper, indicate that it is not applicable.


"""

In [41]:
FIELD = "Science technology"

prompt2 = f""" 

Extract key elements from the provided research paper text. The paper falls under the [{FIELD}] category, and I need information on the following sections:
- Title
- Abstract
- Introduction
- Literature Review
- Research Question/Hypothesis (if applicable)
- Methodology
- Results
- Discussion
- Conclusion
- References
- Ethical Considerations (if applicable)
- Figures/Tables

The paper text is as follows:
"{preprocessed_text}"

Provide detailed information for each section in markdown,  the content and key findings. If a section is not present in the paper, indicate that it is not applicable.


"""




prompt_dict = {
    "Content Understanding": [
        "Summarize the key findings of the research paper.",
        "What is the main objective of the study described in the research paper?",
        "Provide an overview of the methodology used in the research."
    ],
    "Methodology and Experimentation": [
        "Describe the experimental design and methodology employed in the research.",
        "What data sources were used in the study?",
        "Explain the statistical methods or analysis techniques applied in the research."
    ],
    "Results and Analysis": [
        "Highlight the main results and outcomes presented in the paper.",
        "What trends or patterns were observed in the data analysis?",
        "Discuss the significance of the results in the context of the research objectives."
    ],
    "Limitations and Challenges": [
        "Identify and discuss any limitations mentioned in the research paper.",
        "What challenges or constraints were faced during the study?",
        "How might the limitations impact the generalizability of the findings?"
    ],
    "Comparisons and Contrasts": [
        "Compare the approach used in this research paper with other studies in the same field.",
        "Contrast the findings of this paper with those of a related work."
    ],
    "Future Work and Implications": [
        "Examine the recommendations or suggestions for future research provided in the paper.",
        "Discuss the potential real-world applications or implications of the research."
    ],
    "Technical Details": [
        "Explain any complex technical terms or concepts mentioned in the paper.",
        "Clarify the significance of specific algorithms or models used in the research."
    ],
    "Citations and References": [
        "Provide a list of key references cited in the research paper.",
        "Summarize the contributions of the most cited works in the bibliography."
    ],
    "Interdisciplinary Aspects": [
        "Explore any interdisciplinary connections or collaborations mentioned in the paper.",
        "How does the research contribute to multiple fields or disciplines?"
    ],
    "Ethical Considerations": [
        "Discuss any ethical considerations or implications mentioned in the paper.",
        "How does the research address potential ethical challenges in its approach?"
    ],
    "Review of Related Literature": [
        "Summarize the literature review section of the research paper.",
        "What gaps in existing research does this paper aim to fill?"
    ],
    "Critical Analysis": [
        "Provide a critical analysis of the methodology employed in the research.",
        "Discuss potential biases or limitations in the study design."
    ]
}

In [43]:
buttons = 
['Content Understanding',
 'Methodology and Experimentation',
 'Results and Analysis',
 'Limitations and Challenges',
 'Comparisons and Contrasts',
 'Future Work and Implications',
 'Technical Details',
 'Citations and References',
 'Interdisciplinary Aspects',
 'Ethical Considerations',
 'Review of Related Literature',
 'Critical Analysis']

['Content Understanding',
 'Methodology and Experimentation',
 'Results and Analysis',
 'Limitations and Challenges',
 'Comparisons and Contrasts',
 'Future Work and Implications',
 'Technical Details',
 'Citations and References',
 'Interdisciplinary Aspects',
 'Ethical Considerations',
 'Review of Related Literature',
 'Critical Analysis']

In [39]:
"""
At the command line, only need to run once to install the package via pip:

$ pip install google-generativeai
"""

FIELD = "Science technology"

prompt = f""" 

Extract key elements from the provided research paper text. The paper falls under the [{FIELD}] category, and I need information on the following sections:
- Title
- Abstract
- Introduction
- Literature Review
- Research Question/Hypothesis (if applicable)
- Methodology
- Results
- Discussion
- Conclusion
- References
- Ethical Considerations (if applicable)
- Figures/Tables

The paper text is as follows:
"{preprocessed_text}"

Provide detailed information for each section in markdown, summarizing the content and key findings. If a section is not present in the paper, indicate that it is not applicable.


"""









import google.generativeai as genai

genai.configure(api_key="AIzaSyApKK5BXpv6x41yTXubZnkkKBnQq4NLEbM")

# Set up the model
generation_config = {
  "temperature": 1,
  "top_p": 1,
  "top_k": 1,
  "max_output_tokens": 2048,
}

safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
]

model = genai.GenerativeModel(model_name="gemini-pro",
                              generation_config=generation_config,
                              safety_settings=safety_settings)

response = model.generate_content(prompt)
print(response.text)

1. **Title:** Computing Machinery and Intelligence


2. **Abstract:**
   - The paper proposes a test for determining if a machine can think, which involves an interrogator attempting to distinguish between a machine and a human in a conversation.
   - Challenges and objections to the proposed test are explored, including arguments from the theological, mathematical, consciousness, and disabilities standpoints.


3. **Introduction:**
   - The author introduces the study of "thinking machines" and the various definitions of "machine" and "think" that have been proposed.
   - The goal is to find a satisfactory definition of "machine" that will allow for a meaningful investigation into the possibility of machines thinking.


4. **Literature Review:**
   - The paper reviews existing results in mathematical logic that demonstrate limitations to the powers of discrete-state machines, such as Gödel's theorem.
   - It also acknowledges arguments against the possibility of machines thinking, inc

In [40]:
def save_markdown_to_file(markdown_content, file_path="example.md"):
    with open(file_path, 'w') as file:
        file.write(markdown_content)

save_markdown_to_file(response.text)

In [3]:
import markdown2

text_in_markdown = "## Header\nThis is *italic* and this is **bold**."
html_content = markdown2.markdown(text_in_markdown)

# Now you can display or render the `html_content` in your application.
print(html_content)

<h2>Header</h2>

<p>This is <em>italic</em> and this is <strong>bold</strong>.</p>



In [2]:
!pip install markdown2

Collecting markdown2
  Downloading markdown2-2.4.12-py2.py3-none-any.whl.metadata (2.0 kB)
Downloading markdown2-2.4.12-py2.py3-none-any.whl (41 kB)
   ---------------------------------------- 0.0/41.2 kB ? eta -:--:--
   ----------------------------- ---------- 30.7/41.2 kB 1.4 MB/s eta 0:00:01
   ---------------------------------------- 41.2/41.2 kB 500.1 kB/s eta 0:00:00
Installing collected packages: markdown2
Successfully installed markdown2-2.4.12


DEPRECATION: Loading egg at c:\users\91845\appdata\local\programs\python\python311\lib\site-packages\resume_ocr-0.0.1-py3.11.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330
