In [1]:
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [2]:
n_gpu_layers = 5  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [4]:
_PATH_LLAMA_ = "/Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Llama-2-7b-Chat-GGUF/llama-2-7b-chat.Q5_K_M.gguf"
_PATH_MISTRAL_ = "/Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf"
_PATH_MISTRAL_ORCA_ = "/Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q5_K_M.gguf"

In [5]:
MISTRAL = LlamaCpp(model_path=_PATH_MISTRAL_ORCA_,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=4096,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,
    max_tokens=1000)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32002,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q5_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q5_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q5_K     [  4096, 14336,     1,     1 ]
llama_model_loader: 

In [6]:
LLAMA = LlamaCpp(
    model_path=_PATH_LLAMA_,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=4096,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,
    max_tokens=1000
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Llama-2-7b-Chat-GGUF/llama-2-7b-chat.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor

In [7]:
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain

In [8]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate


from langchain.chains.combine_documents.stuff import StuffDocumentsChain

In [9]:
## Prompt Defination
skills_template = """Get Key Skills from Job Description Below:
"{job_desc}"
KEY SKILLS:"""

skill_prompt = PromptTemplate.from_template(skills_template)

In [10]:
## Prompt Defination
prompt_summary_template = """CREATE NEW RESUME SUMMARY BY MODIFYING ORIGIN SUMMARY BELOW:
"{summary}"

BY UTILISING JOB DESCRIPTION PROVIDED BELOW:
"{job_desc}

DO NOT EXCEED WORD COUNT BY: 
"{limit}

NEW RESUME SUMMARY:"""

prompt_summary = PromptTemplate.from_template(prompt_summary_template)

In [11]:
PROFILE_SUMMARY = """
Hello, I am Nitesh, an Experienced Machine Learning professional with 4+ years of industry expertise in leveraging advanced statistical analysis& machine learning and data analysis to lead data driven decisions

With a Master's degree from IIT Madras, I have a solid understanding of statistics, machine learning, signal processing and time series analysis.


Achievement
    Led development of a scalable & efficient Big Data ML product, which consumes close to 10TBs of data using AWS 
    Built Scalable S3 Prefixes Validation Routine using Apache Spark, achieving performance of 1M Prefixes Validation in 2 minutes using 5 Node Cluster
    Improved Logistics Business Process by Accurate Forecasting Models resulting in value creation of 1.5M USD

"""

In [12]:
jd_description = """
Consumer Marketing Analytics Team: ML Engineer


As Eli Lilly strives to achieve its purpose of making life better for patients, we have been building up our in-house ‘Consumer Experience’ function, which will design and execute next-generation marketing campaigns aimed at informing and educating consumers (or patients) directly.


To support the Consumer marketing teams in their decision-making, a data and analytics team has been set up simultaneously in Indianapolis (HQ) and Bengaluru (LCCI). This team is responsible for setting up the data warehouses necessary to handle large volumes of digital streaming data, create meaningful analyses using that data, and deliver recommendations to leadership.


As part of the LCCI team, we are excited to offer the role of a ML Engineer who will be an integral part of the Consumer analytics team.


Core Responsibilities


    4-8 years of demonstrated experience of building and deploying ML pipelines
    Take standalone models built by data scientists and enhance them to build large scale machine learning pipelines that are deployed on the cloud (AWS)
    Design and build robust and optimized ML pipelines by applying software engineering rigor and best practices to machine learning, including CI/CD, automation, etc
    Coordinate with diverse stakeholders such as statisticians, software engineers, infrastructure teams to better understand requirements and constraints to design the most optimal ML pipelines
    Continuous learning to stay up to date with new technologies to improve performance, maintainability, and reliability of ML systems
    Solving open ended and unstructured questions in an environment where new ML pipeline solutions need to be explored and built from the ground up


Required


    Deep expertise in building ML/automation pipelines from scratch
    Strong knowledge of AWS Sagemaker and working knowledge of AWS EC2
    Strong knowledge of python and PySpark. Working knowledge of R is preferred
    Understanding of statistical modelling concepts highly preferred
    Strong knowledge of working tools like Docker, Kubernetes, Jenkins
"""

In [13]:
summary_prompt = prompt_summary.format(summary=PROFILE_SUMMARY, job_desc=jd_description, limit=100)
skills_prompt = skill_prompt.format(job_desc=jd_description)

In [14]:
skills_prompt

'Get Key Skills from Job Description Below:\n"\nConsumer Marketing Analytics Team: ML Engineer\n\n\nAs Eli Lilly strives to achieve its purpose of making life better for patients, we have been building up our in-house ‘Consumer Experience’ function, which will design and execute next-generation marketing campaigns aimed at informing and educating consumers (or patients) directly.\n\n\nTo support the Consumer marketing teams in their decision-making, a data and analytics team has been set up simultaneously in Indianapolis (HQ) and Bengaluru (LCCI). This team is responsible for setting up the data warehouses necessary to handle large volumes of digital streaming data, create meaningful analyses using that data, and deliver recommendations to leadership.\n\n\nAs part of the LCCI team, we are excited to offer the role of a ML Engineer who will be an integral part of the Consumer analytics team.\n\n\nCore Responsibilities\n\n\n    4-8 years of demonstrated experience of building and deployi

In [15]:
LLAMA(skills_prompt)



* Building ML pipelines from scratch
* Machine learning algorithms
* AWS SageMaker
* AWS EC2
* Python
* PySpark
* R
* Docker
* Kubernetes
* Jenkins

































































































































































































































































































































































































































































































































































































































































































































































































































()










































()




llama_print_timings:        load time =  7653.16 ms
llama_print_timings:      sample time =   776.59 ms /  1000 runs   (    0.78 ms per token,  1287.68 tokens per second)
llama_print_timings: prompt eval time =  7653.07 ms /   491 tokens (   15.59 ms per token,    64.16 tokens per second)


"\n\n* Building ML pipelines from scratch\n* Machine learning algorithms\n* AWS SageMaker\n* AWS EC2\n* Python\n* PySpark\n* R\n* Docker\n* Kubernetes\n* Jenkins\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\

llama_print_timings:        eval time = 42064.69 ms /   999 runs   (   42.11 ms per token,    23.75 tokens per second)
llama_print_timings:       total time = 52371.51 ms


In [16]:
MISTRAL(skills_prompt)

 Machine Learning Engineering, AWS Sagemaker, Python, PySpark, Statistical Modeling Concepts, Data Analytics, Cloud Computing (AWS), Continuous Integration & Deployment (CI/CD)

From the given job description, it can be inferred that the ML Engineer would be working on:
1. Building and deploying machine learning pipelines.
2. Designing and optimizing large-scale machine learning pipelines to be deployed on cloud platforms like AWS.
3. Collaborating with diverse stakeholders for understanding requirements and constraints of ML pipelines.
4. Continuous learning to stay up-to-date with new technologies.
5. Solving open-ended and unstructured questions in an environment where new ML pipeline solutions need to be explored and built from the ground up.


llama_print_timings:        load time =  7584.78 ms
llama_print_timings:      sample time =   127.46 ms /   176 runs   (    0.72 ms per token,  1380.79 tokens per second)
llama_print_timings: prompt eval time =  7584.66 ms /   465 tokens (   16.31 ms per token,    61.31 tokens per second)
llama_print_timings:        eval time =  7375.62 ms /   175 runs   (   42.15 ms per token,    23.73 tokens per second)
llama_print_timings:       total time = 15364.20 ms


' Machine Learning Engineering, AWS Sagemaker, Python, PySpark, Statistical Modeling Concepts, Data Analytics, Cloud Computing (AWS), Continuous Integration & Deployment (CI/CD)\n\nFrom the given job description, it can be inferred that the ML Engineer would be working on:\n1. Building and deploying machine learning pipelines.\n2. Designing and optimizing large-scale machine learning pipelines to be deployed on cloud platforms like AWS.\n3. Collaborating with diverse stakeholders for understanding requirements and constraints of ML pipelines.\n4. Continuous learning to stay up-to-date with new technologies.\n5. Solving open-ended and unstructured questions in an environment where new ML pipeline solutions need to be explored and built from the ground up.'

In [17]:
LLAMA(summary_prompt)

Llama.generate: prefix-match hit




As a seasoned Machine Learning professional with 4+ years of experience in industry, I excel at leveraging advanced statistical analysis and machine learning to drive data-driven decisions. My expertise lies in building scalable ML pipelines from scratch, utilizing AWS Sagemaker and PySpark, as well as working knowledge of R. With a solid understanding of statistical modeling concepts, I have demonstrated the ability to enhance standalone models built by data scientists and deploy large scale machine learning pipelines on the cloud. My experience in coordinating with diverse stakeholders and continuous learning has enabled me to design robust and optimized ML pipelines that are both maintainable and reliable. I am excited to apply my skills as a ML Engineer within Eli Lilly's Consumer Marketing Analytics Team, where I can utilize my expertise to drive business growth."


llama_print_timings:        load time =  7653.16 ms
llama_print_timings:      sample time =   156.63 ms /   183 runs   (    0.86 ms per token,  1168.39 tokens per second)
llama_print_timings: prompt eval time =  8617.48 ms /   735 tokens (   11.72 ms per token,    85.29 tokens per second)
llama_print_timings:        eval time =  7437.12 ms /   182 runs   (   40.86 ms per token,    24.47 tokens per second)
llama_print_timings:       total time = 16585.85 ms


'\n\nAs a seasoned Machine Learning professional with 4+ years of experience in industry, I excel at leveraging advanced statistical analysis and machine learning to drive data-driven decisions. My expertise lies in building scalable ML pipelines from scratch, utilizing AWS Sagemaker and PySpark, as well as working knowledge of R. With a solid understanding of statistical modeling concepts, I have demonstrated the ability to enhance standalone models built by data scientists and deploy large scale machine learning pipelines on the cloud. My experience in coordinating with diverse stakeholders and continuous learning has enabled me to design robust and optimized ML pipelines that are both maintainable and reliable. I am excited to apply my skills as a ML Engineer within Eli Lilly\'s Consumer Marketing Analytics Team, where I can utilize my expertise to drive business growth."'

In [None]:
## LLAMA 2 7B Resume Profile Summary based on Default Profile Summary &  Provied JD ##
"""Nitesh is a seasoned Machine Learning professional with over 4 years of industry experience in leveraging advanced statistical & predictive modeling, machine learning and data analysis 
to drive results through digital transformation. Backed by his Master's degree from IIT Madras, he has a solid understanding of statistics, machine learning, signal processing & Time series analysis. 
He has tackled diverse challenges, from designing experiments, building and implementing research papers, and operating models at scale. 

His proudest accomplishment is designing and developing a scalable and cost-efficient B2B product for customers that can handle close to 10 TBs of sensor data utilizing AWS Services and Airflow.
With his passion for staying up-to-date with recent advancements in NLP and large language models, he is well-versed in implementing end-to-end machine learning pipelines, conducting experiments and 
benchmarking to assess the performance of various model architectures and optimizing hyperparameters. He has experience in deploying AI applications powered by complex deep learning models in 
the field of NLP and is proficient in Python, Java, LP, Machine Learning, and Deep Learning (TensorFlow and Pytorch"""


In [None]:
## Mistral Instruct Profile Summary based on Default Profile Summary &  Provied JD ##

"""Hello, I am Nitesh, an experienced AI/ ML Sr Consultant with a passion for driving digital transformation through advanced statistical & predictive modelling and machine learning. 
   Backed by 8+ years of industry experience in web analytics and related fields, my expertise includes deep dive into emerging product roadmap, trending customer problems, and 
   deriving referential deliverables to implementation teams. I have hands-on experience working with large language models such as GPT, LLAMA, BERT, or 
   Transformer-based architectures, conducting experiments and benchmarking to assess model performance and optimize hyperparameters, and troubleshooting 
   issues during training and deployment. 
   My strong technical knowledge of predictive modeling, feature engineering, model evaluation, and selection has enabled me to deploy AI 
   applications powered by complex deep learning models in the field of NLP, and I am proficient in Python, Java, LP, Machine Learning, and Deep Learning (TensorFlow and Pytorch). 
   In addition to my technical expertise, I have a solid understanding of production machine learning workflow, an ongoing understanding of current state-of-the-art NLU architectures, 
   methods, and processes, and experience in fine tuning and customizing LLM."""


In [18]:
MISTRAL(summary_prompt)

Llama.generate: prefix-match hit



"
Hello, I am an experienced Machine Learning Engineer with a strong foundation in building scalable & efficient Big Data ML pipelines. With expertise in AWS Sagemaker and PySpark, I have successfully designed & deployed large-scale data processing systems for various industries.

Recently, I led the development of a scalable machine learning product that consumes up to 10TBs of data using AWS infrastructure. Additionally, my proficiency in statistical modeling concepts enables me to build accurate forecasting models, resulting in significant value creation.

As an ML Engineer at Eli Lilly's Consumer Experience team, I will leverage my expertise in building and deploying machine learning pipelines while working closely with data scientists, software engineers, and other stakeholders to drive impactful marketing decisions."


llama_print_timings:        load time =  7584.78 ms
llama_print_timings:      sample time =   138.77 ms /   168 runs   (    0.83 ms per token,  1210.61 tokens per second)
llama_print_timings: prompt eval time =  8559.40 ms /   703 tokens (   12.18 ms per token,    82.13 tokens per second)
llama_print_timings:        eval time =  7186.94 ms /   167 runs   (   43.04 ms per token,    23.24 tokens per second)
llama_print_timings:       total time = 16194.74 ms


'\n"\nHello, I am an experienced Machine Learning Engineer with a strong foundation in building scalable & efficient Big Data ML pipelines. With expertise in AWS Sagemaker and PySpark, I have successfully designed & deployed large-scale data processing systems for various industries.\n\nRecently, I led the development of a scalable machine learning product that consumes up to 10TBs of data using AWS infrastructure. Additionally, my proficiency in statistical modeling concepts enables me to build accurate forecasting models, resulting in significant value creation.\n\nAs an ML Engineer at Eli Lilly\'s Consumer Experience team, I will leverage my expertise in building and deploying machine learning pipelines while working closely with data scientists, software engineers, and other stakeholders to drive impactful marketing decisions."'

In [19]:
MISTRAL("Would Einstien have liked working in data science domain if he was born in 21st century?")

Llama.generate: prefix-match hit




Well, who knows! But let's think about it.

We all know Einstein as a great physicist who made significant contributions to our understanding of the universe. He is well-known for his theory of relativity and his famous equation, E=mc².

Now, data science is an interdisciplinary field that deals with extracting knowledge or insights from structured and unstructured data using techniques like statistics, artificial intelligence, machine learning, and others. It has a wide range of applications in various fields like healthcare, finance, marketing, research, etc.

So, would Einstein be interested in working in the data science field if he were alive today? Let's explore some possibilities.

1. Curiosity and thirst for knowledge: One of the key traits that made Einstein a great scientist was his curiosity and desire to understand how the world works. Data science also involves asking questions, exploring patterns, and finding answers in complex data. It is likely that he would have been


llama_print_timings:        load time =  7584.78 ms
llama_print_timings:      sample time =   605.45 ms /   615 runs   (    0.98 ms per token,  1015.77 tokens per second)
llama_print_timings: prompt eval time =   305.01 ms /    22 tokens (   13.86 ms per token,    72.13 tokens per second)
llama_print_timings:        eval time = 25226.85 ms /   614 runs   (   41.09 ms per token,    24.34 tokens per second)
llama_print_timings:       total time = 27586.01 ms


"\n\nWell, who knows! But let's think about it.\n\nWe all know Einstein as a great physicist who made significant contributions to our understanding of the universe. He is well-known for his theory of relativity and his famous equation, E=mc².\n\nNow, data science is an interdisciplinary field that deals with extracting knowledge or insights from structured and unstructured data using techniques like statistics, artificial intelligence, machine learning, and others. It has a wide range of applications in various fields like healthcare, finance, marketing, research, etc.\n\nSo, would Einstein be interested in working in the data science field if he were alive today? Let's explore some possibilities.\n\n1. Curiosity and thirst for knowledge: One of the key traits that made Einstein a great scientist was his curiosity and desire to understand how the world works. Data science also involves asking questions, exploring patterns, and finding answers in complex data. It is likely that he woul

In [66]:
## Defining StuffDocumentsChain
stuff_chain = StuffDocumentsChain(
    llm_chain=llm_chain, document_variable_name="text"
)

In [67]:
docs = loader.load()

In [68]:
print(stuff_chain.run(docs))

Llama.generate: prefix-match hit




Please provide one worded key skill that you see in the job description above.

Please provide one worded key skill that you see in the job description above.



llama_print_timings:        load time =   936.89 ms
llama_print_timings:      sample time =    14.47 ms /    19 runs   (    0.76 ms per token,  1312.97 tokens per second)
llama_print_timings: prompt eval time =  2649.11 ms /    29 tokens (   91.35 ms per token,    10.95 tokens per second)
llama_print_timings:        eval time =  1206.90 ms /    18 runs   (   67.05 ms per token,    14.91 tokens per second)
llama_print_timings:       total time =  3925.97 ms


In [17]:
import requests

In [22]:
URL = "https://www.linkedin.com/jobs/collections/recommended/?currentJobId=3714806549"
page = requests.get(URL)

In [32]:
URL = "https://www.linkedin.com/jobs/collections/recommended/?currentJobId=3714806549"
page = requests.get(URL)

soup = BeautifulSoup(page.content, "html.parser")


In [34]:
results = soup.find(div="job-details-jobs-unified-top-card__content--two-pane")

In [35]:
results