In [3]:
!pip install transformers==4.31.0 tokenizers==0.13.3
!pip install einops==0.6.1
!pip install xformers==0.0.22.post7
!pip install langchain==0.1.4
!pip install faiss-gpu==1.7.1.post3
!pip install sentence_transformers
!pip install accelerate
!pip install --upgrade torch torchvision torchaudio
!pip install bitsandbytes

Collecting transformers<5.0.0,>=4.41.0 (from sentence_transformers)
  Downloading transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.22,>=0.21 (from transformers<5.0.0,>=4.41.0->sentence_transformers)
  Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.47.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m88.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m69.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.13.3


Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.0


In [4]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    use_auth_token=hf_auth,
    low_cpu_mem_usage=True

)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")



config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Model loaded on cuda:0


In [5]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [6]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [7]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [8]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [9]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

Device set to use cuda:0


In [10]:
res = generate_text("Explain me the difference between Data Lakehouse and Data Warehouse.")
print(res[0]["generated_text"])

Explain me the difference between Data Lakehouse and Data Warehouse. Unterscheidung between Data Lakehouse and Data Warehouse?
A data lakehouse is a centralized repository that stores all of an organization's data, both structured and unstructured, in its raw form. It is designed to handle large volumes of data from various sources and provide a single source of truth for data-driven insights. On the other hand, a data warehouse is a repository that stores structured data in a specific format, typically optimized for querying and analysis.
Here are some key differences between a data lakehouse and a data warehouse:
1. Structure: A data lakehouse stores data in its raw, unprocessed form, while a data warehouse stores data in a structured format, typically optimized for querying and analysis.
2. Sources: A data lakehouse can store data from various sources, including internal systems, external databases, and IoT devices, while a data warehouse typically only stores data from within the o

In [11]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
llm(prompt="Explain me the difference between Data Lakehouse and Data Warehouse.")

  warn_deprecated(


' Unterscheidung between data lakehouse and data warehouse? A data warehouse is a centralized repository that stores data in a structured format, making it easy to access and analyze. A data lakehouse, on the other hand, is a repository that stores raw, unprocessed data in its original form, allowing for more flexibility in how the data can be analyzed and processed.\n\nA data warehouse is designed to support fast query performance by storing data in a highly optimized structure, typically using relational database management systems (RDBMS). In contrast, a data lakehouse stores data in a flexible, schema-on-read format, which allows for more flexibility in how the data can be analyzed and processed.\n\nHere are some key differences between data warehouses and data lakehouses:\n\n1. Structure: A data warehouse stores data in a highly optimized structure, while a data lakehouse stores data in a flexible, schema-on-read format.\n2. Data processing: A data warehouse is designed for fast q

In [34]:
def train_model_resume_based(data):
    from langchain.schema import Document

# Convert text to a list of Document objects
    print(data)
    ans=data['data']['resume_text']
    #print(ans)
    documents = [Document(page_content=ans, metadata={'source': 'example_text'})]

    from langchain.text_splitter import RecursiveCharacterTextSplitter

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    all_splits = text_splitter.split_documents(documents)


    from langchain.embeddings import HuggingFaceEmbeddings
    from langchain.vectorstores import FAISS

    model_name = "sentence-transformers/all-mpnet-base-v2"
    model_kwargs = {"device": "cuda"}

    embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
    vectorstore = FAISS.from_documents(all_splits, embeddings)


    from langchain.chains import ConversationalRetrievalChain

    chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)


    chat_history = []

    query = data['data']['question']
    result = chain({"question": query, "chat_history": chat_history})
    #print(result['answer'])

    return result['answer']


In [27]:
#data = {'data': {'question': 'what is the experience', 'model_used': 'resume_based', 'resume_text': "Mohit Anand 6280959850  anand2003mohit@gmail.com  https://www.linkedin.com/in/mohit-anand-a4b07033a/ https://github.com/MohitAnand01 Education Thapar University, Bachelor of Engineering in Computer Science | CGPA: 7.97/10, Guru Gobind Singh Public School, Class 12th AISSCE | 92.00%, D.A.V Public School, Class 10th AISSCE | 92.60%, Projects: StudyNotion (React, Express, MongoDB) – January 2023–December 2023: Comprehensive web application to enhance student study experience; MoodCare (Python, React, Django, NLP, Machine Learning) – July 2024–Present: Mental health evaluation and prediction using ML techniques; Breast Cancer Detection (Python, Machine Learning) – January 2023–July 2023: Preprocessing dataset and implementing ML models; Human Face Expression Detection (Python, Deep Learning, CV2) – August 2023–December 2023: Face detection system using deep learning. Technical Skills: Python, C++, SQL, JavaScript, HTML/CSS, React, Express, React, Node.js, Flask, FastAPI, Scikit Learn, TensorFlow, Git, Docker, Google Cloud Platform, VS Code, Visual Studio, EC2; Libraries: Pandas, NumPy, Matplotlib. Achievements: Second Position in Chess - Silver medal in Thapar Olympics."}}



In [33]:
#train_model_resume_based(data)

 Mohit Anand has experience working on several projects, including StudyNotion, MoodCare, Breast Cancer Detection, and Human Face Expression Detection. He has also worked on various technical skills such as Python, C++, SQL, JavaScript, HTML/CSS, React, Express, Node.js, Flask, FastAPI, Scikit Learn, TensorFlow, Git, Docker, Google Cloud Platform, VS Code, Visual Studio, EC2, and libraries like Pandas, NumPy, and Matplotlib.


In [24]:
def train_model_general_based(data):

    from langchain.llms import HuggingFacePipeline

    llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine


    question = str(data['data']['question'])
    result=llm(prompt=question)
    #res = generate_text(question)
    #result=res[0]["generated_text"]


    return result

In [14]:
!pip install flask flask-ngrok
!pip install pyngrok

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25
Collecting pyngrok
  Downloading pyngrok-7.2.2-py3-none-any.whl.metadata (8.4 kB)
Downloading pyngrok-7.2.2-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.2


In [None]:
from flask import Flask,request,jsonify
from pyngrok import ngrok,conf

app = Flask(__name__)


@app.route('/', methods=['POST'])
def home():
    # Retrieve data sent as form data or JSON

    data = request.json  # This will be a dictionary
    print(data)
    #data['data']['model_used']


    if data['data']['model_used']=="resume_based":
        response=train_model_resume_based(data)
        return jsonify({"Answer": response}), 200
    else:
        response=train_model_general_based(data)
   # if data and 'data' in data:
       # received_data = data['data']
        #print(f"Received data: {received_data}")
        # You can process the dictionary data here
        # Example: print each key-value pair
        #for key, value in received_data.items():
            #if key==model_used:
#             if model_used==resume_based:
                #response=train_model_resume_based(data)
             # else:
                #response=train_model_general_based(data)
            #print(f"{key}: {value}")

        return jsonify({"Answer": response}), 200
        #return response

if __name__ == "__main__":
    # Start ngrok and specify the port (e.g., 8000)
    conf.get_default().auth_token = "2qsx2EAOhLYue7VtfKFAN95DCaR_99iTg1Xkfxbyvgp4HZmX"
    public_url = ngrok.connect(5000)
    print(f"Public URL: {public_url}")

    # Run Flask on the same port
    app.run(port=5000)

Public URL: NgrokTunnel: "https://a640-35-240-158-176.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


{'data': {'question': 'what is the experience in java', 'model_used': 'resume_based', 'resume_text': 'Mohit Anand\n6280959850 |anand2003mohit@gmail.com |https://www.linkedin.com/in/mohit-anand-a4b07033a/ |\nhttps://github.com/MohitAnand01\nEducation\nThapar University Patiala, India\nBachelor of Engineering in Computer Science |CGPA: 7.97/10 September. 2021 – July 2025\nGuru Gobind Singh Public School Ludhiana, India\nClass 12th AISSCE |92.00% March. 2021\nD.A.V Public School Ludhiana, India\nClass 10th AISSCE |92.60% March. 2019\nProjects\nStudyNotion |React,Express,MongoDB January 2023 – December 2023\n•Create a comprehensive web application to enhance the study experience for students.\n•Integrate various educational tools and resources on a single platform.\n•Technologies Used: Frontend: React Backend: Express Database: MongoDB\n•Provide a seamless user experience with robust functionalities.\nMoodCare |Python, React, Django,NLP,Machine Learning July 2024 – Present\n•Develop an app

INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 13:42:41] "POST / HTTP/1.1" 200 -


{'data': {'question': 'what is the skills', 'model_used': 'resume_based', 'resume_text': 'Mohit Anand\n6280959850 |anand2003mohit@gmail.com |https://www.linkedin.com/in/mohit-anand-a4b07033a/ |\nhttps://github.com/MohitAnand01\nEducation\nThapar University Patiala, India\nBachelor of Engineering in Computer Science |CGPA: 7.97/10 September. 2021 – July 2025\nGuru Gobind Singh Public School Ludhiana, India\nClass 12th AISSCE |92.00% March. 2021\nD.A.V Public School Ludhiana, India\nClass 10th AISSCE |92.60% March. 2019\nProjects\nStudyNotion |React,Express,MongoDB January 2023 – December 2023\n•Create a comprehensive web application to enhance the study experience for students.\n•Integrate various educational tools and resources on a single platform.\n•Technologies Used: Frontend: React Backend: Express Database: MongoDB\n•Provide a seamless user experience with robust functionalities.\nMoodCare |Python, React, Django,NLP,Machine Learning July 2024 – Present\n•Develop an application to 

INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 13:43:44] "POST / HTTP/1.1" 200 -


{'data': {'question': 'what is the capital of india', 'model_used': 'general_model', 'resume_text': None}}


INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 13:44:13] "POST / HTTP/1.1" 200 -
