In [None]:
!sudo apt install tesseract-ocr
!pip install pytesseract

In [None]:
!sudo apt install poppler-utils

In [None]:
pip install pdf2image

In [None]:
!pip install -q streamlit

In [None]:
!pip install -q replicate

In [None]:
%%writefile app.py

import streamlit as st
import pandas as pd
import pickle
from pdf2image import convert_from_bytes
import pytesseract
import re
import replicate
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["REPLICATE_API_TOKEN"] = "r8_GHG89WAPgiMQEsm3r5UzIzxKFkUpPFi0fLzSe"

# Function to convert pdf bytes to images
def pdf_to_img(pdf_bytes):
    images = convert_from_bytes(pdf_bytes)
    for i, image in enumerate(images):
        image.save(f"page_{i+1}.jpg", "JPEG")

# Function to extract text from images using Tesseract OCR
def extract_text_from_images():
    text = ""
    for i in range(2):
        filename = f"page_{i+1}.jpg"
        text += pytesseract.image_to_string(filename)
    return text

# Function to extract data from text
def extract_data_from_text(text):
    categoriser_data = {}

    # Name Extraction
    name = text.split("\n")[0]
    categoriser_data['name'] = name

    # College Name Extraction
    pattern = r"(?i)\b(SRM\s+(?:University|Institute)\s+of\s+Science\s+and\s+Technology)"
    college = re.search(pattern, text)

    if college:
        university = college.group(0)
        categoriser_data['college'] = university
    else:
        categoriser_data['college'] = "University name not found"

    # GPA Extraction
    start_index = text.find("GPA:")
    if start_index != -1:
        end_index = text.find("\n", start_index)
        gpa = text[start_index + 4:end_index].strip()  # 4 to skip "GPA: "
        categoriser_data['cgpa'] = gpa
    else:
        categoriser_data['cgpa'] = "GPA not found"

    # Internships Section
    start_index = text.find("LEADERSHIP & EXPERIENCE")
    if start_index == -1:
        raise ValueError("Leadership & Experience section not found")

    end_index = text.find("PROJECTS", start_index)
    if end_index == -1:
        raise ValueError("Projects section not found")

    section = text[start_index:end_index].strip()
    categoriser_data['internships'] = section

    # Projects Section
    project_start_index = text.find("PROJECTS")
    if project_start_index == -1:
        raise ValueError("projects section not found")

    project_end_index = text.find("TECH SOCIETIES", project_start_index)
    if project_end_index == -1:
        raise ValueError("tech societies section not found")

    projects_section = text[project_start_index:project_end_index].strip()
    categoriser_data['projects'] = projects_section

    return categoriser_data

# Load the model
with open('model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)


show_recommendation = False

def start_page():

    # Initialize variables
    category_person = None
    question_arr = None


    # Display file uploader for document file
    document_file = st.file_uploader("Upload document file", type=['pdf'])

    if document_file:
        # Convert pdf bytes to images
        pdf_to_img(document_file.getvalue())
        st.write("Document file processed and converted to images")

        # Extract text from images
        text = extract_text_from_images()

        # Extract data from text
        categoriser_data = extract_data_from_text(text)

        # Display the extracted data
        st.write("Extracted data:", categoriser_data)

        # Create a placeholder for the user inputs
        user_inputs = {}

        # Create a dictionary to store presence indicators
        presence_indicators = {
            'internships_done': 0,
            'projects_done': 0,
            'research_done': 0,
            'social_groups_joined': 0,
            'volunteering_exp': 0,
            'pof_done': 0
        }

        # Display the form for each variable
        st.title("Variable Input Form")
        input_variables = ['name', 'age', 'college', 'year_of_study', 'cgpa', 'preferred_univ', 'internships', 'projects',
                           'research_papers', 'social_groups', 'volunteering', 'position_of_resp']
        for variable in input_variables:
            # Check if the variable is present in the categoriser_data dictionary
            if variable in categoriser_data:
                default_value = categoriser_data[variable]
            else:
                default_value = ""

            # Create input field for the variable
            user_input = st.text_input(f"Enter value for {variable}:", value=default_value)

            # If input is empty, mark it as None
            if not user_input:
                user_inputs[variable] = ""
            else:
                user_inputs[variable] = user_input

            # Update presence indicators
            if variable == 'internships' and user_input != "":
                presence_indicators['internships_done'] = 1
            if variable == 'projects' and user_input != "":
                presence_indicators['projects_done'] = 1
            if variable == 'research_papers' and user_input != "":
                presence_indicators['research_done'] = 1
            if variable == 'social_groups' and user_input != "":
                presence_indicators['social_groups_joined'] = 1
            if variable == 'volunteering' and user_input != "":
                presence_indicators['volunteering_exp'] = 1
            if variable == 'position_of_resp' and user_input != "":
                presence_indicators['pof_done'] = 1

        # Mapping presence indicators to parameters in the data
        indicator_to_param = {
            "internships_done": "internships",
            "projects_done": "projects",
            "research_done": "research_papers",
            "social_groups_joined": "social_groups",
            "volunteering_exp": "volunteering",
            "pof_done": "position_of_resp"
        }

        # Define missing and present parameters based on presence indicators
        missing_parameters = [key for key, value in presence_indicators.items() if value == 0]
        present_parameters = [indicator_to_param[indicator] for indicator, value in presence_indicators.items() if value == 1]

        prompts = []

        # Generate prompts for each category
        for param in missing_parameters:
            prompt = f"My name is {user_inputs['name']}, can you please highlight some things which I should take care, while writing about my {param.replace('_', ' ')}, please be specific and just write everything in points. Also, give responses like you are talking to a real person and be polite while calling names and everything"
            prompts.append(prompt)

        for param in present_parameters:
            prompt = f"My name is {user_inputs['name']}, list out the things which I can improve, while telling about my {param.replace('_', ' ')}, please be specific and just write everything in points. Also, give responses like you are talking to a real person and be polite while calling names and everything"
            prompts.append(prompt)

        # Generate prompt for university preference
        univ_prompt = f"My name is {user_inputs['name']}, I am currently in my {user_inputs['year_of_study']} at {user_inputs['college']}. I want to go in these {user_inputs['preferred_univ']} universities. Can you list a tailored structure of how to get into these universities, please be specific and just write everything in points. Also, give responses like you are talking to a real person and be polite while calling names and everything"
        prompts.append(univ_prompt)

        # Generate prompt for GRE preparation
        gre_prompt = f"My name is {user_inputs['name']}, I am currently in my {user_inputs['year_of_study']}, do you think this is the right time for me to prepare for GRE, and tell me is GRE necessary for {user_inputs['preferred_univ']}. If you think this is the right time to start, how should I prepare for it?, please be specific and just write everything in points. Also, give responses like you are talking to a real person and be polite while calling names and everything"
        prompts.append(gre_prompt)

        # Generate prompt for SOP (Statement of Purpose) for desired university
        sop_prompt = f"My name is {user_inputs['name']}, I am applying for admission to {user_inputs['preferred_univ']}. Can you guide me on how to write a compelling Statement of Purpose (SOP) specifically tailored for this university? Please include key points to cover and any specific requirements or tips from your experience. Also, give responses like you are talking to a real person and be polite while calling names and everything."
        prompts.append(sop_prompt)

        # Get the data for present parameters
        present_data = {key: user_inputs[key] for key in present_parameters}

        # Generate improvement prompt including present data
        improvement_prompt = f"My name is {user_inputs['name']}, I have provided information about my {', '.join(present_parameters)}. Here are the details:\n\n"
        for param, value in present_data.items():
          improvement_prompt += f"{param.replace('_', ' ')}: {value}\n"
        improvement_prompt += "\nCan you suggest any improvements or additional details I should include to make my profile more compelling for university applications or job opportunities? Please provide specific advice and suggestions. Also, give responses like you are talking to a real person and be polite while calling names and everything."

        prompts.append(improvement_prompt)

        # Generate prompt for preparing for interviews
        interview_prompt = f"My name is {user_inputs['name']}, I am preparing for interviews for internships or job opportunities. Can you provide tips and strategies for preparing effectively for technical and behavioral interviews? Please include common questions, how to approach technical assessments, and advice for presenting my experience and skills confidently. Also, give responses like you are talking to a real person and be polite while calling names and everything."
        prompts.append(interview_prompt)

        # Generate prompt for improving communication skills
        communication_prompt = f"My name is {user_inputs['name']}, I want to improve my communication skills, both written and verbal, to enhance my professional profile. Can you suggest resources, courses, or activities that can help me develop effective communication skills? Please include practical tips and exercises for improving clarity, coherence, and persuasiveness in communication. Also, give responses like you are talking to a real person and be polite while calling names and everything."
        prompts.append(communication_prompt)

        # Add a submit button
        if st.button("Submit"):
          # Create DataFrame from presence_indicators dictionary
          df = pd.DataFrame([presence_indicators])

          show_recommendation = True

          # Make prediction using loaded model
          category_person = loaded_model.predict(df)[0]
          question_arr = prompts

          # Store category_person and question_arr in session state
          st.session_state['category_person'] = category_person
          st.session_state['question_arr'] = question_arr

          return None

    return None

# model="meta-llama/Llama-2-7b-chat-hf"
# tokenizer=AutoTokenizer.from_pretrained(model)
# pipeline=transformers.pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     torch_dtype=torch.bfloat16,
#     trust_remote_code=True,
#     device_map="auto",
#     min_length=200,
#     max_length=1000,
#     do_sample=True,
#     top_k=10,
#     num_return_sequences=1,
#     eos_token_id=tokenizer.eos_token_id
#     )

# llm=HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature':0.7})


# deaken_data = ["""Deaken University English Language Requirements
# English Language Requirements:
# TOEFL iBT: Minimum score of 79-93 depending on the program.
# IELTS: Minimum overall score of 6.0-7.0 with no band lower than 6.0 depending on the program.
# PTE Academic: Minimum score of 58-64 depending on the program.
# GRE Requirements:

# GRE Score Requirements:
# Graduate Programs: Many graduate programs, particularly those in business, education, and humanities, require the GRE.
# The minimum score requirement varies depending on the program, but it's typically in the range of 300-330 for the verbal and quantitative sections.
# Some programs may also require the writing section with a minimum score of 3.5.

# Additional Requirements:

# Work experience: Some programs may require relevant work experience.
# Portfolio: Creative programs may require a portfolio of your work.
# Statement of purpose: A well-written statement of purpose is essential for all applications.

# Research requirements:
# Instead, the focus is on demonstrating your research potential and suitability for the specific program you're applying to. To achieve this, they require several key documents:
# 1. Academic Transcripts: These showcase your academic performance in coursework relevant to your research interests.
# 2. Statement of Purpose: This document allows you to express your research interests, motivations, and relevant skills and experiences. It's your chance to highlight your specific contributions to the field and why you're a good fit for the program.
# 3. Research Proposal: While not always mandatory, some programs may request a research proposal outlining your proposed research topic, methodology, and expected outcomes. This helps assess your research skills and alignment with the program's focus.
# 4. References: Academic referees familiar with your academic achievements and research potential can provide valuable insights to the admissions committee.
# 5. Additional documents: Depending on the program, you might need to submit work samples, publications (if any), evidence of research experience, or other relevant materials.

# Statement of Purpose:
# Template:

# Introduction:

# Briefly introduce yourself and state your intention to apply to a specific program at Deakin University.
# Mention the semester and year you are applying for.
# Academic Background and Achievements:

# Highlight your relevant academic background, including your degree(s), relevant coursework, and your overall academic performance.
# Mention any academic awards, scholarships, or distinctions you have received.
# Research Experience and Interests:

# If applicable, describe your research experience (e.g., internships, lab work, independent projects).
# Clearly articulate your research interests and align them with the specific program you are applying to.
# Mention specific faculty members whose research aligns with your interests and how you see yourself contributing to their work.
# Motivations and Goals:

# Explain your motivations for pursuing this program at Deakin University. What attracts you to the program and the university?
# Clearly state your short-term and long-term career goals and how this program will help you achieve them.
# Conclusion:

# Briefly summarize your key strengths and qualifications.
# Reiterate your enthusiasm for the program and express your gratitude for the committee's time and consideration.
# Tips:

# Be specific and relevant: Tailor your statement to the specific program you are applying to. Highlight skills and experiences directly related to the program's requirements and research focus.
# Demonstrate your passion: Show your genuine interest in the program and the field of research. Use specific examples and evidence to support your claims.
# Be concise and clear: Aim for a clear and concise statement within the suggested word limit. Use strong verbs and avoid unnecessary jargon.
# Proofread carefully: Ensure your statement is free of grammatical errors and typos.
# """]

# vectorstore = FAISS.from_texts(
#   deaken_data, embedding=HuggingFaceEmbeddings()
# )

# # Create a retriever from the vectorstore
# retriever = vectorstore.as_retriever()

# # Create a new prompt template using the HuggingFace embeddings
# template = """Answer the question based only on the following context: {context}

# Question: {question}
# """

# min_tokens = 256

# prompt = ChatPromptTemplate.from_template(template, min_tokens=min_tokens)

# chain = (
#     {"context": retriever, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# # Create a new FAISS vectorstore using the HuggingFace embeddings
# vectorstore = FAISS.from_texts(
#   deaken_data, embedding=HuggingFaceEmbeddings()
# )

# # Create a retriever from the vectorstore
# retriever = vectorstore.as_retriever()

# # Create a new prompt template using the HuggingFace embeddings
# template = """Answer the question based only on the following context: {context}

# Question: {question}
# """

# prompt = ChatPromptTemplate.from_template(template)

# chain = (
#     {"context": retriever, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# chain.invoke("What all things are required to get into Deaken University")


def question_and_answer(question, answer):
    """Create a question and answer card with a dropdown icon.

    Args:
        question: The question to be displayed on the card.
        answer: The answer to be displayed on the card.
    """
    with st.expander(question):
        st.write(answer)


def recommendation():
  st.title("Recommendation")
  st.write("## Question and Answer")

  if 'category_person' in st.session_state and 'question_arr' in st.session_state:
    category_person = st.session_state['category_person']
    question_arr = st.session_state['question_arr']
    st.write("Category: ", category_person)
    st.write("Question list: ", question_arr)

    # Add your questions and answers here
    questions = question_arr  # Assuming question_arr is the list of prompts
    answers = []

    # Track current question index and initialize to 0
    current_question_index = st.session_state.get('current_question_index', 0)

    # Display logic with button handling
    if current_question_index < len(questions):
      question_prompt = questions[current_question_index]

      # Generate answer using large language model
      output = replicate.run(
          "meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d",
          input={"prompt": question_prompt,
                 "temperature": 0.75,
                 "max_new_tokens": 2000,
                 "min_new_tokens": 256}
      )

      response_text = "".join(item for item in output)
      answers.append(response_text)

      # Display the question and answer
      question_and_answer(question_prompt, response_text)

      # Button layout with Prev and Next
      col1, col2 = st.columns(2)
      if current_question_index > 0:
        if col1.button("Previous"):
          current_question_index -= 1
      if current_question_index < len(questions) - 1:
        if col2.button("Next"):
          current_question_index += 1

      # Update session state with the new index
      st.session_state['current_question_index'] = current_question_index

  else:
        st.write("Please fill out the form on the previous page.")


def personal_ques():
  st.title("Help Page")

  # User input for question
  question = st.text_area("Ask your question here:")

  # Generate answer using large language model (replace with your specific API call)
  if question:
    output = replicate.run(
        "meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d",
        input={"prompt": question,
               "temperature": 0.75,
               "max_new_tokens": 2000,
               "min_new_tokens": 256}
    )

    response_text = "".join(item for item in output)
    question_and_answer("Your Question:", question)
    question_and_answer("Answer:", response_text)

  else:
    st.write("Please enter your question.")

def main():
    st.sidebar.title('Navigation')
    page = st.sidebar.radio("Go to", ['Data Extraction and Variable Input Form', 'Recommendation', 'Help'])

    user_inputs = {}

    if page == 'Data Extraction and Variable Input Form':
        start_page()
    if show_recommendation:
        recommendation()
    elif page == 'Recommendation':
        recommendation()
    elif page == 'Help':
        personal_ques()

if __name__ == "__main__":
    main()


Writing app.py


In [None]:
!npm install localtunnel

In [None]:
!streamlit run /content/app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com