In [1]:
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [2]:
n_gpu_layers = 5  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [3]:
_PATH_LLAMA_ = "/Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Llama-2-7b-Chat-GGUF/llama-2-7b-chat.Q5_K_M.gguf"
_PATH_MISTRAL_ = "/Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf"
_PATH_MISTRAL_ORCA_ = "/Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q5_K_M.gguf"

In [6]:
MISTRAL = LlamaCpp(model_path=_PATH_MISTRAL_ORCA_,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=4096,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=False,
    max_tokens=1000)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32002,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q5_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q5_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q5_K     [  4096, 14336,     1,     1 ]
llama_model_loader: 

In [5]:
LLAMA = LlamaCpp(
    model_path=_PATH_LLAMA_,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=4096,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=False,
    max_tokens=1000
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /Users/niteshkumarsharma/.cache/lm-studio/models/TheBloke/Llama-2-7b-Chat-GGUF/llama-2-7b-chat.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor

In [7]:
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain

In [8]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate


from langchain.chains.combine_documents.stuff import StuffDocumentsChain

In [9]:
## Prompt Defination
skills_template = """Get Key Skills from Job Description Below:
"{job_desc}"
KEY SKILLS:"""

skill_prompt = PromptTemplate.from_template(skills_template)

In [10]:
## Prompt Defination
prompt_summary_template = """CREATE NEW RESUME SUMMARY BY MODIFYING ORIGIN SUMMARY BELOW:
"{summary}"

BY UTILISING JOB DESCRIPTION PROVIDED BELOW:
"{job_desc}

DO NOT EXCEED WORD COUNT BY: 
"{limit}

NEW RESUME SUMMARY:"""

prompt_summary = PromptTemplate.from_template(prompt_summary_template)

In [11]:
PROFILE_SUMMARY = """
Hello, I am Nitesh, an Experienced Machine Learning professional with 4+ years of industry expertise in leveraging advanced statistical analysis& machine learning and data analysis to lead data driven decisions

With a Master's degree from IIT Madras, I have a solid understanding of statistics, machine learning, signal processing and time series analysis.

Achievement
    Led development of a scalable & efficient Big Data ML product, which consumes close to 10TBs of data using AWS 
    Built Scalable S3 Prefixes Validation Routine using Apache Spark, achieving performance of 1M Prefixes Validation in 2 minutes using 5 Node Cluster
    Improved Logistics Business Process by Accurate Forecasting Models resulting in value creation of 1.5M USD

"""

In [12]:
jd_description = """
How You Will Make a Difference


As a Senior Machine Learning Engineer specializing in entity resolution, you will be responsible for designing, developing, and implementing advanced machine learning algorithms and models to tackle the intricacies of data matching, entity linkage, and deduplication. You will work closely with cross-functional teams, including Machine Learning engineers, data analysts, and domain experts, to understand requirements and translate them into scalable and efficient solutions. Your work will have a direct impact on developing new and improving existing machine learning solutions, increasing operational efficiency & data quality, and driving meaningful business outcomes for Reltio through application of machine learning and data science practices.


Responsibilities


    Develop state-of-the-art algorithms and machine learning models for entity resolution, leveraging both supervised and unsupervised techniques.
    Conduct thorough data analysis, identify data quality issues, and implement data preprocessing and feature engineering techniques to improve model performance.
    Explore and experiment with new data sources, features, and modeling techniques to enhance entity resolution accuracy and efficiency.
    Work closely with domain experts to understand specific requirements, domain-specific challenges, and incorporate expert knowledge into the entity resolution process.
    Evaluate and benchmark existing entity resolution models and algorithms, and propose innovative approaches to address limitations and improve performance.
    Collaborate with cross-functional teams to integrate entity resolution solutions into existing data management systems and processes.
    Conduct regular performance monitoring and analysis to ensure the accuracy, reliability, and scalability of entity resolution solutions.
    Stay up-to-date with the latest advancements in data science, machine learning, and entity resolution techniques, and proactively apply new methodologies and technologies to enhance existing solutions.
    Document research findings, methodologies, and model evaluations, and effectively communicate complex technical concepts to both technical and non-technical stakeholders.


Qualification


    Master's or equivalent experience in Computer Science, Data Science, Statistics, or a related quantitative field.
    Proven experience (5+ years) working as a Machine Learning engineer, Data Scientist or similar roles, specializing in applied data science and entity resolution.
    Strong knowledge of and experience in machine learning, data mining, and statistical analysis for model development, validation, implementation and in product integration
    Proficiency in programming languages such as Python or Scala, and experience working with data manipulation and analysis libraries (e.g., Pandas, NumPy, scikit-learn).
    Experience with large-scale data processing frameworks and technologies (i.e. Spark) and proficiency in SQL and database concepts.
    Solid understanding of feature engineering, dimensionality reduction, and data preprocessing techniques.
    Excellent problem-solving skills and the ability to develop creative and innovative solutions to complex data challenges.
    Strong communication skills, with the ability to effectively collaborate with cross-functional teams and explain technical concepts to non-technical stakeholders.
    Attention to detail, ability to work independently, and a passion for staying updated with the latest advancements in the field of data science


Preference


    Masters or PhD in Computer Science, Data Science, Statistics, or a related quantitative field would be preferred
    Preference to candidates with familiarity and prior experience with entity resolution techniques, such as record linkage, deduplication, and data matching, along with an understanding of associated challenges and trade-offs
    Prior experience with frameworks such as PyTorch, TensorFlow, Keras or MLFlow is a plus
"""

In [13]:
summary_prompt = prompt_summary.format(summary=PROFILE_SUMMARY, job_desc=jd_description, limit=100)
skills_prompt = skill_prompt.format(job_desc=jd_description)

In [14]:
LLAMA(skills_prompt)


1. Machine Learning (50%) - Developing state-of-the-art algorithms and machine learning models for entity resolution, leveraging both supervised and unsupervised techniques. Conducting thorough data analysis to identify data quality issues and implementing data preprocessing and feature engineering techniques to improve model performance. Exploring new data sources, features, and modeling techniques to enhance entity resolution accuracy and efficiency.
2. Data Science (30%) - Applying advanced statistical analysis for model development, validation, implementation, and integration into existing systems. Developing creative and innovative solutions to complex data challenges. Communicating technical concepts effectively to both technical and non-technical stakeholders.
3. Programming Languages (20%) - Proficiency in programming languages such as Python or Scala, working with data manipulation and analysis libraries (e.g., Pandas, NumPy, scikit-learn). Experience with large-scale data pr

'\n1. Machine Learning (50%) - Developing state-of-the-art algorithms and machine learning models for entity resolution, leveraging both supervised and unsupervised techniques. Conducting thorough data analysis to identify data quality issues and implementing data preprocessing and feature engineering techniques to improve model performance. Exploring new data sources, features, and modeling techniques to enhance entity resolution accuracy and efficiency.\n2. Data Science (30%) - Applying advanced statistical analysis for model development, validation, implementation, and integration into existing systems. Developing creative and innovative solutions to complex data challenges. Communicating technical concepts effectively to both technical and non-technical stakeholders.\n3. Programming Languages (20%) - Proficiency in programming languages such as Python or Scala, working with data manipulation and analysis libraries (e.g., Pandas, NumPy, scikit-learn). Experience with large-scale dat

In [15]:
MISTRAL(skills_prompt)

 Machine Learning, Entity Resolution, Data Science, Statistical Analysis, Programming (Python/Scala), Large-scale Data Processing Frameworks, Feature Engineering, Dimensionality Reduction, Data Preprocessing Techniques, Problem Solving, Cross-functional Collaboration, Communication, Attention to Detail

' Machine Learning, Entity Resolution, Data Science, Statistical Analysis, Programming (Python/Scala), Large-scale Data Processing Frameworks, Feature Engineering, Dimensionality Reduction, Data Preprocessing Techniques, Problem Solving, Cross-functional Collaboration, Communication, Attention to Detail'

In [16]:
LLAMA(summary_prompt)


As a seasoned Machine Learning professional with over 4 years of experience in leveraging advanced statistical analysis and machine learning to drive data-driven decisions, I excel in developing scalable & efficient solutions for entity resolution. With a Master's degree from IIT Madras and a proven track record of leading development of big data ML products, improving business processes, and creating value worth 1.5M USD, I am confident in my ability to design, develop, and implement cutting-edge algorithms and models for entity resolution. My proficiency in programming languages such as Python/Scala, experience working with large-scale data processing frameworks (e.g., Spark), solid understanding of feature engineering, dimensionality reduction, and data preprocessing techniques make me a strong candidate for this role at Reltio. I am passionate about staying updated on the latest advancements in the field of data science and enjoy working independently, collaborating with cross-fun

"\nAs a seasoned Machine Learning professional with over 4 years of experience in leveraging advanced statistical analysis and machine learning to drive data-driven decisions, I excel in developing scalable & efficient solutions for entity resolution. With a Master's degree from IIT Madras and a proven track record of leading development of big data ML products, improving business processes, and creating value worth 1.5M USD, I am confident in my ability to design, develop, and implement cutting-edge algorithms and models for entity resolution. My proficiency in programming languages such as Python/Scala, experience working with large-scale data processing frameworks (e.g., Spark), solid understanding of feature engineering, dimensionality reduction, and data preprocessing techniques make me a strong candidate for this role at Reltio. I am passionate about staying updated on the latest advancements in the field of data science and enjoy working independently, collaborating with cross-f

In [None]:
## LLAMA 2 7B Resume Profile Summary based on Default Profile Summary &  Provied JD ##
"""Nitesh is a seasoned Machine Learning professional with over 4 years of industry experience in leveraging advanced statistical & predictive modeling, machine learning and data analysis 
to drive results through digital transformation. Backed by his Master's degree from IIT Madras, he has a solid understanding of statistics, machine learning, signal processing & Time series analysis. 
He has tackled diverse challenges, from designing experiments, building and implementing research papers, and operating models at scale. 

His proudest accomplishment is designing and developing a scalable and cost-efficient B2B product for customers that can handle close to 10 TBs of sensor data utilizing AWS Services and Airflow.
With his passion for staying up-to-date with recent advancements in NLP and large language models, he is well-versed in implementing end-to-end machine learning pipelines, conducting experiments and 
benchmarking to assess the performance of various model architectures and optimizing hyperparameters. He has experience in deploying AI applications powered by complex deep learning models in 
the field of NLP and is proficient in Python, Java, LP, Machine Learning, and Deep Learning (TensorFlow and Pytorch"""


In [None]:
## Mistral Instruct Profile Summary based on Default Profile Summary &  Provied JD ##

"""Hello, I am Nitesh, an experienced AI/ ML Sr Consultant with a passion for driving digital transformation through advanced statistical & predictive modelling and machine learning. 
   Backed by 8+ years of industry experience in web analytics and related fields, my expertise includes deep dive into emerging product roadmap, trending customer problems, and 
   deriving referential deliverables to implementation teams. I have hands-on experience working with large language models such as GPT, LLAMA, BERT, or 
   Transformer-based architectures, conducting experiments and benchmarking to assess model performance and optimize hyperparameters, and troubleshooting 
   issues during training and deployment. 
   My strong technical knowledge of predictive modeling, feature engineering, model evaluation, and selection has enabled me to deploy AI 
   applications powered by complex deep learning models in the field of NLP, and I am proficient in Python, Java, LP, Machine Learning, and Deep Learning (TensorFlow and Pytorch). 
   In addition to my technical expertise, I have a solid understanding of production machine learning workflow, an ongoing understanding of current state-of-the-art NLU architectures, 
   methods, and processes, and experience in fine tuning and customizing LLM."""


In [17]:
MISTRAL(summary_prompt)



Experienced Machine Learning Engineer specializing in entity resolution with a proven track record of designing and developing advanced machine learning algorithms and models. Skilled in data analysis, feature engineering and model performance evaluation for addressing complex challenges such as matching, deduplication, and linkage. Able to work closely with cross-functional teams, incorporate expert knowledge, and maintain accuracy, reliability and scalability of entity resolution solutions. Proficient in programming languages like Python or Scala, and experienced with large-scale data processing frameworks such as Spark. Strong communication skills enable effective collaboration with both technical and non-technical stakeholders."

Remember to keep the summary concise yet informative, highlighting key strengths and experiences relevant to the job description.

'\n\nExperienced Machine Learning Engineer specializing in entity resolution with a proven track record of designing and developing advanced machine learning algorithms and models. Skilled in data analysis, feature engineering and model performance evaluation for addressing complex challenges such as matching, deduplication, and linkage. Able to work closely with cross-functional teams, incorporate expert knowledge, and maintain accuracy, reliability and scalability of entity resolution solutions. Proficient in programming languages like Python or Scala, and experienced with large-scale data processing frameworks such as Spark. Strong communication skills enable effective collaboration with both technical and non-technical stakeholders."\n\nRemember to keep the summary concise yet informative, highlighting key strengths and experiences relevant to the job description.'

In [19]:
MISTRAL("Would Einstien have liked working in data science domain if he was born in 21st century?")

Llama.generate: prefix-match hit




Well, who knows! But let's think about it.

We all know Einstein as a great physicist who made significant contributions to our understanding of the universe. He is well-known for his theory of relativity and his famous equation, E=mc².

Now, data science is an interdisciplinary field that deals with extracting knowledge or insights from structured and unstructured data using techniques like statistics, artificial intelligence, machine learning, and others. It has a wide range of applications in various fields like healthcare, finance, marketing, research, etc.

So, would Einstein be interested in working in the data science field if he were alive today? Let's explore some possibilities.

1. Curiosity and thirst for knowledge: One of the key traits that made Einstein a great scientist was his curiosity and desire to understand how the world works. Data science also involves asking questions, exploring patterns, and finding answers in complex data. It is likely that he would have been


llama_print_timings:        load time =  7584.78 ms
llama_print_timings:      sample time =   605.45 ms /   615 runs   (    0.98 ms per token,  1015.77 tokens per second)
llama_print_timings: prompt eval time =   305.01 ms /    22 tokens (   13.86 ms per token,    72.13 tokens per second)
llama_print_timings:        eval time = 25226.85 ms /   614 runs   (   41.09 ms per token,    24.34 tokens per second)
llama_print_timings:       total time = 27586.01 ms


"\n\nWell, who knows! But let's think about it.\n\nWe all know Einstein as a great physicist who made significant contributions to our understanding of the universe. He is well-known for his theory of relativity and his famous equation, E=mc².\n\nNow, data science is an interdisciplinary field that deals with extracting knowledge or insights from structured and unstructured data using techniques like statistics, artificial intelligence, machine learning, and others. It has a wide range of applications in various fields like healthcare, finance, marketing, research, etc.\n\nSo, would Einstein be interested in working in the data science field if he were alive today? Let's explore some possibilities.\n\n1. Curiosity and thirst for knowledge: One of the key traits that made Einstein a great scientist was his curiosity and desire to understand how the world works. Data science also involves asking questions, exploring patterns, and finding answers in complex data. It is likely that he woul

In [66]:
## Defining StuffDocumentsChain
stuff_chain = StuffDocumentsChain(
    llm_chain=llm_chain, document_variable_name="text"
)

In [67]:
docs = loader.load()

In [68]:
print(stuff_chain.run(docs))

Llama.generate: prefix-match hit




Please provide one worded key skill that you see in the job description above.

Please provide one worded key skill that you see in the job description above.



llama_print_timings:        load time =   936.89 ms
llama_print_timings:      sample time =    14.47 ms /    19 runs   (    0.76 ms per token,  1312.97 tokens per second)
llama_print_timings: prompt eval time =  2649.11 ms /    29 tokens (   91.35 ms per token,    10.95 tokens per second)
llama_print_timings:        eval time =  1206.90 ms /    18 runs   (   67.05 ms per token,    14.91 tokens per second)
llama_print_timings:       total time =  3925.97 ms


In [17]:
import requests

In [22]:
URL = "https://www.linkedin.com/jobs/collections/recommended/?currentJobId=3714806549"
page = requests.get(URL)

In [32]:
URL = "https://www.linkedin.com/jobs/collections/recommended/?currentJobId=3714806549"
page = requests.get(URL)

soup = BeautifulSoup(page.content, "html.parser")


In [34]:
results = soup.find(div="job-details-jobs-unified-top-card__content--two-pane")

In [35]:
results