In [2]:
#Libraries
from llama_index.core import Document, VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from PyPDF2 import PdfReader
import os
from dotenv import load_dotenv


#Load Environment variables
load_dotenv()
api_key = os.getenv("api_key")

#Configure LlamaIndex
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0, api_key=api_key)
Settings.embed_model = OpenAIEmbedding(api_key=api_key)

#Obtain Document
reader = PdfReader("Ridwanullah Osho Resume..pdf")

#Function to retrieve documents
def read_docs(paths: list) -> str:
    """
    Function to retrieve documents in pdf and text formats.
    Input: paths = list[str]
    output: retrieved_document = str
    """

    documents = {}
    for index, path in enumerate(paths):
        if path.endswith(".txt"):
            try:
                with open(path, "r") as f:
                    documents[f"doc_{index}"] = []
                    documents[f"doc_{index}"].append(f.read())
            except FileNotFoundError:
                print(f"File not found for {path}")
            except Exception as e:
                print(f"Error reading file {path}: {e}")
        elif path.endswith(".pdf"):
            try:
                reader = PdfReader(path)
                for i in range(len(reader.pages)):
                    if i == 0:
                        documents[f"doc_{index}"] = []
                    text = reader.pages[i].extract_text().strip()
                    if text:
                        documents[f"doc_{index}"].append(text)
            except FileNotFoundError:
                print(f"File not found for {path}")
            except Exception as e:
                print(f"Error reading file {path}: {e}")
    return ["\n".join(text_list) for text_list in documents.values()]


read_docs(["Ridwanullah Osho Resume..pdf"])

['OSHO RIDW ANULLAH\nEmail: oshoridwanullah@gmail.com | Github: RIdwanullah-Osho | linkedIn: Ridwan | No: +234 703 598 8776\nPROFESSIONAL SUMMARY\nI’m a motivated Mathematics graduate with growing expertise in Data Analytics and AI Engineering. Skilled in Python,\nSQL, Power BI, and machine learning, I apply analytical thinking to solve real-world problems and support data-driven\ndecisions. Certified in Jobberman Soft Skills and Cisco Cybersecurity, I’m passionate about continuous learning,\ncollaboration, and creating impactful, technology-driven solutions.\nWORK EXPERIENCE\nGenerative AI Fellowship, Ogun,Nigeria\nAI Engineering InternAugust 2025 - Present\nCreating and managing  data pipelines for predictive analysis using SQL , integrating it with Python through SQLAlchemy to extract,\nprocess, and analyze datasets.\nApplying machine learning techniques to identify patterns and generate insights that supported data-driven decision-making within the\nproject.\nDeveloped and presente

In [3]:
#Create documents and index
documents = read_docs(["Ridwanullah Osho Resume..pdf", "info.txt"])
llama_docs = [Document(text = doc) for doc in documents]
index = VectorStoreIndex.from_documents(llama_docs)

#Query
query_engine = index.as_query_engine()
response = query_engine.query("Provide a summary on Ridwanullah Osho Resume, with that summary give possible job roles or positions he can occupy")

In [None]:
response

'Ridwanullah Osho is a Mathematics graduate with expertise in Data Analytics and AI Engineering. He is skilled in Python, SQL, Power BI, and machine learning. Ridwanullah has experience in creating data pipelines for predictive analysis, applying machine learning techniques, and developing sales reporting systems. He has also worked on projects in data collection, cleaning, and visualization. \n\nPossible job roles or positions Ridwanullah Osho can occupy include:\n1. Data Analyst\n2. AI Engineer\n3. Machine Learning Engineer\n4. Data Scientist\n5. Business Intelligence Analyst'

In [4]:
# Query
response = query_engine.query("Outside his resume and info file, what other things would he probably be interested in?")

In [5]:
response

Response(response='Table Tennis, Video Games', source_nodes=[NodeWithScore(node=TextNode(id_='b9ed3087-01fb-45c5-9518-65de74936cb7', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='ec572df8-1a0e-435c-b517-6a4800f20e16', node_type='4', metadata={}, hash='c16e2bfe6ba4db45da13024f729879fb807394f82f806d34d439fcfa6fe7a894')}, metadata_template='{key}: {value}', metadata_separator='\n', text='OSHO RIDW ANULLAH\nEmail: oshoridwanullah@gmail.com | Github: RIdwanullah-Osho | linkedIn: Ridwan | No: +234 703 598 8776\nPROFESSIONAL SUMMARY\nI’m a motivated Mathematics graduate with growing expertise in Data Analytics and AI Engineering. Skilled in Python,\nSQL, Power BI, and machine learning, I apply analytical thinking to solve real-world problems and support data-driven\ndecisions. Certified in Jobberman Soft Skills and Cisco Cybersecurity, I’m passionate about continuous learning