### Creating vector space


In [1]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader, PyPDFDirectoryLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field
# from langchain import Aggregator

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq

groq_api_key=os.getenv("GROQ_API_KEY")

llm=ChatGroq(groq_api_key=groq_api_key,model_name="Llama3-8b-8192")

llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000002755A7B5DF0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002755A7B6A50>, model_name='Llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [3]:
os.environ['HUGGINGFACE_API_KEY']=os.getenv("HUGGINGFACE_API_KEY")
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [134]:
# 1. Load, chunk and index the contents of the blog to create a retriever.
import bs4
loader = WebBaseLoader(
    web_paths=("https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401",)
    # ,
    # bs_kwargs=dict(
    #     parse_only=bs4.SoupStrainer(
    #         class_=("post-content", "post-title", "post-header")
    #     )
    # ),
)

web_docs=loader.load()
web_docs

[Document(metadata={'source': 'https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401', 'title': 'Data Analytics & AI - Manager - Data Analytics Job Details | EY', 'description': 'Dublin 2 Data Analytics & AI - Manager - Data Analytics', 'language': 'en-US'}, page_content='\n\n\n\n\n\n\n\n\n\n\n\n\nData Analytics & AI - Manager - Data Analytics Job Details | EY\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCookie information\nWelcome to the EY careers job search site. This website is based on the SuccessFactors software provided by SAP. On this page, functional and optional cookies are used to improve your experience and design our careers site more user-friendly and in line with your needs. In this context, cookies from providers in third countries may also be used and data may be transmitted to providers such as social media services outside the EU. For this we require your consent. By clicking "Accept All Cookies", 

In [None]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=3000,chunk_overlap=500)
web_splits=text_splitter.split_documents(web_docs)
vectorstore=Chroma.from_documents(documents=web_splits,embedding=embeddings)
retriever=vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000016DBBCE3680>, search_kwargs={})

In [137]:
web_splits

[Document(metadata={'source': 'https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401', 'title': 'Data Analytics & AI - Manager - Data Analytics Job Details | EY', 'description': 'Dublin 2 Data Analytics & AI - Manager - Data Analytics', 'language': 'en-US'}, page_content='Data Analytics & AI - Manager - Data Analytics Job Details | EY'),
 Document(metadata={'source': 'https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401', 'title': 'Data Analytics & AI - Manager - Data Analytics Job Details | EY', 'description': 'Dublin 2 Data Analytics & AI - Manager - Data Analytics', 'language': 'en-US'}, page_content='Cookie information\nWelcome to the EY careers job search site. This website is based on the SuccessFactors software provided by SAP. On this page, functional and optional cookies are used to improve your experience and design our careers site more user-friendly and in line with 

In [None]:
## Prompt Template
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"

    # "{resume}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [21]:
question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [22]:
response=rag_chain.invoke({"input":"tell me about this job"})
response

{'input': 'tell me about this job',
 'context': [Document(metadata={'description': 'Dublin 2 Data Analytics & AI - Manager - Data Analytics', 'language': 'en-US', 'source': 'https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401', 'title': 'Data Analytics & AI - Manager - Data Analytics Job Details | EY'}, page_content='When you join EY, you will be supported to ensure that you are enhancing your skills from day one.\nContinuous learning, where you can develop the mindset and skills to navigate whatever comes next.\nAs you grow and develop here, you’ll discover opportunities to help customise your career journey, so that it’s as unique as you are - success is defined by you, we will provide the tools and flexibility, so you can make a meaningful impact, your way.\nTransformative leadership, we will give you the insights, coaching and confidence to be the leader the world needs.\nDiverse and inclusive culture, you will be embraced for

## Read Resume

In [None]:
loader = PyPDFDirectoryLoader("../resume")
resume_docs = loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
resume_splits=text_splitter.split_documents(resume_docs)
vectorstore=Chroma.from_documents(documents=resume_splits,embedding=embeddings)
resume_retriever=vectorstore.as_retriever()
resume_retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000016DAF42BBF0>, search_kwargs={})

In [40]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"

    # "{resume}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [41]:
question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [42]:
response=rag_chain.invoke({"input":"can you write cover letter for this job for given resume?"})
response

{'input': 'can you write cover letter for this job for given resume?',
 'context': [Document(metadata={'description': 'Dublin 2 Data Analytics & AI - Manager - Data Analytics', 'language': 'en-US', 'source': 'https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401', 'title': 'Data Analytics & AI - Manager - Data Analytics Job Details | EY'}, page_content='We recognise the strength that comes from having a diverse workforce and building a culture where we support all our people to achieve their potential. You’ll be embraced for who you are and empowered to use your voice to help others find theirs. As an equal opportunities’ employer, we welcome applications from people of all backgrounds. Reasonable accommodations are offered at every stage of our recruitment process.\n\xa0\nIf you can confidently demonstrate that you meet the criteria above, please contact us as soon as possible.\n\xa0\nJoin us in building a better working world.\xa0

In [43]:
splits

[Document(metadata={'source': '..\\resume\\Resume.pdf', 'page': 0}, page_content='Tanmay Suhas Jagtap \nGalway, Ireland | Stamp 1G | +353874852825 | tanmayjagtap78@gmail.com | LinkedIn | GitHub | Portfolio  \n \nSUMMARY  \n \nData Scientist with over 5 years of experience in machine learning  and data science . Proven track record of building \nscalable data pipelines, deploying machine learning models, and leading high -impact projects in industries ranging from \nfinancial services to AI -driven innovation. Skilled in collaborating with cross -functional teams to dr ive business growth \nthrough data-driven insights. Holding a Stamp 1G work permit and eligible to take up full-time employment in Ireland. \n \nTECHNICAL SKILLS \n \nLanguages:  Python, SQL, HTML, CSS, JS, R, C# \nAI/ML:  TensorFlow–Keras, SK-learn, Langchain , FastAPI, Firebase, Streamlit, Git, PowerBI, Spacy, GCP, OpenAI \nAnalytics:  PostgreSQL, PowerBI, SQLite3, Pandas, Apache Spark, Excel \n   \nPROFESSIONAL EXPERIE

In [38]:
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000016DAF785D60>, search_kwargs={})

## Combine both vectorspace

In [1]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader, PyPDFDirectoryLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field
# from langchain import Aggregator

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq

groq_api_key=os.getenv("GROQ_API_KEY")

llm=ChatGroq(groq_api_key=groq_api_key,model_name="Llama3-8b-8192")

llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001D357503680>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001D357504500>, model_name='Llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [4]:
os.environ['HUGGINGFACE_API_KEY']=os.getenv("HUGGINGFACE_API_KEY")
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="impira/layoutlm-document-qa")

  from tqdm.autonotebook import tqdm, trange
No sentence-transformers model found with name impira/layoutlm-document-qa. Creating a new one with mean pooling.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [5]:


class QuestionParser(BaseModel):
    resume_q: str = Field(description="Question to retrieve relevant resume infomation")
    job_q: str = Field(description="Question to retrieve relevant job information ")
    response: str = Field(description="General response on quesry if questions are not created")

    
parser = PydanticOutputParser(pydantic_object=QuestionParser)


In [6]:
primary_system_prompt = (
    # "You are carier coach who helps applicants to apply for the job. "
    # "You have resume in your vector space with job description."
    "There are two more bots in pipeline one is with user resume information and another one with job information"
    "Your job is to create questions for them which will help to answer user question and retrieve relevant information"
    "Dont generate questions just give response if you feel you can answer given question by yourself."
    "Answer the user query strictly using this.\n{que_parser}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", primary_system_prompt),
        ("human", "{input}")
    ]
)

In [7]:
input_dict = {"input":"can you write a cover letter for me?", "que_parser":parser.get_format_instructions()}


In [None]:
chain = prompt|llm|parser
response = chain.invoke(input_dict)

In [9]:
res = response.dict()

In [10]:
res

{'resume_q': "Could you provide more information about the position you're applying for?",
 'job_q': 'What specific skills or qualifications do you think you would bring to the role?',
 'response': "I'd be happy to help you with that! However, to provide a more tailored cover letter, could you please provide me with more details about the position and your qualifications? That way, I can help you craft a unique and effective cover letter."}

In [None]:
loader = WebBaseLoader(
    web_paths=("https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401",)
    # ,
    # bs_kwargs=dict(
    #     parse_only=bs4.SoupStrainer(
    #         class_=("post-content", "post-title", "post-header")
    #     )
    # ),
)



web_docs=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=3000,chunk_overlap=400)
web_splits=text_splitter.split_documents(web_docs) 

web_vectorstore=Chroma.from_documents(documents=web_splits,embedding=embeddings, collection_name="job_collection")
web_retriever=web_vectorstore.as_retriever()



In [12]:
web_splits

[Document(metadata={'source': 'https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401', 'title': 'Data Analytics & AI - Manager - Data Analytics Job Details | EY', 'description': 'Dublin 2 Data Analytics & AI - Manager - Data Analytics', 'language': 'en-US'}, page_content='Data Analytics & AI - Manager - Data Analytics Job Details | EY\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCookie information\nWelcome to the EY careers job search site. This website is based on the SuccessFactors software provided by SAP. On this page, functional and optional cookies are used to improve your experience and design our careers site more user-friendly and in line with your needs. In this context, cookies from providers in third countries may also be used and data may be transmitted to providers such as social media services outside the EU. For this we require your consent. By clicking "Accept All Cookies", you agree to these. This a

In [13]:
web_system_prompt = ("You have job information with you help answer given question in consise but effective manner. mention basic job description in response"
                     "With following information:"
                     "<context>"
                     "{context}"
                     "<context>")

web_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", web_system_prompt),
        ("human", "{input}"),
    ]
)

In [14]:
question_answer_chain=create_stuff_documents_chain(llm,web_prompt)
web_rag_chain=create_retrieval_chain(web_retriever,question_answer_chain)
web_res = web_rag_chain.invoke({"input":res['job_q']})
# web_res = web_rag_chain.invoke({"input":"what are requerements for this job?"})

In [15]:
res

{'resume_q': "Could you provide more information about the position you're applying for?",
 'job_q': 'What specific skills or qualifications do you think you would bring to the role?',
 'response': "I'd be happy to help you with that! However, to provide a more tailored cover letter, could you please provide me with more details about the position and your qualifications? That way, I can help you craft a unique and effective cover letter."}

In [16]:
web_res

{'input': 'What specific skills or qualifications do you think you would bring to the role?',
 'context': [Document(metadata={'description': 'Dublin 2 Data Analytics & AI - Manager - Data Analytics', 'language': 'en-US', 'source': 'https://careers.ey.com/ey/job/Dublin-2-Data-Analytics-&-AI-Manager-Data-Analytics/1088849901/?feedId=337401', 'title': 'Data Analytics & AI - Manager - Data Analytics Job Details | EY'}, page_content='Your key responsibilities\xa0\u202f\xa0\n\xa0\nThe nature of this role means no two projects will be the same. That means you’ll need to think on your feet and challenge existing practices to develop solutions for complex problems. You’ll also be collaborating with colleagues across multiple service lines, so you will be expected to build relationships and identify opportunities for our clients to benefit from our expertise in other areas. Occasional travel may be necessary as you will be meeting with key clients, some of whom are the most respected in their fi

In [17]:
web_res['answer']

'Based on the job description, I believe I would bring the following skills and qualifications to the role:\n\n1. Proven experience: With 5+ years of experience as a Data Analytics professional, I have a strong foundation in data analysis, visualization, and reporting, as well as experience working in a consulting environment.\n2. Analytical skills: I possess strong analytical skills, including the ability to assimilate and apply new techniques and knowledge to deliver insights and solve problems.\n3. Communication skills: I am able to effectively communicate technical information to non-technical colleagues and clients, which is critical for this role.\n4. Leadership skills: I have experience supervising and developing others, and I am confident in my ability to lead a team and inspire others to achieve their best.\n5. Technical skills: I have a working knowledge of key analytics tools such as PowerBI, Tableau, Qlik, SQL, Python, R, and cloud computing platforms like Azure, AWS, and G

In [22]:
resume_splits

[Document(metadata={'source': '..\\resume\\Resume.pdf', 'page': 0}, page_content='Tanmay Suhas Jagtap \nGalway, Ireland | Stamp 1G | +353874852825 | tanmayjagtap78@gmail.com | LinkedIn | GitHub | Portfolio  \n \nSUMMARY  \n \nData Scientist with over 5 years of experience in machine learning  and data science . Proven track record of building \nscalable data pipelines, deploying machine learning models, and leading high -impact projects in industries ranging from \nfinancial services to AI -driven innovation. Skilled in collaborating with cross -functional teams to dr ive business growth \nthrough data-driven insights. Holding a Stamp 1G work permit and eligible to take up full-time employment in Ireland. \n \nTECHNICAL SKILLS \n \nLanguages:  Python, SQL, HTML, CSS, JS, R, C# \nAI/ML:  TensorFlow–Keras, SK-learn, Langchain , FastAPI, Firebase, Streamlit, Git, PowerBI, Spacy, GCP, OpenAI \nAnalytics:  PostgreSQL, PowerBI, SQLite3, Pandas, Apache Spark, Excel \n   \nPROFESSIONAL EXPERIE

In [None]:
loader = PyPDFDirectoryLoader("../resume")
resume_docs = loader.load()

resume_splits=text_splitter.split_documents(resume_docs)

resume_vectorstore=Chroma.from_documents(documents=resume_splits, embedding=embeddings, collection_name="resume_collection")
resume_retriever=resume_vectorstore.as_retriever()

In [19]:

resume_system_prompt = ("You are resume expert with user resume in your vectorspace use this job information:"
                     "{job_info}"
                     "and help user with there query"
                     "With following information:"
                     "<context>"
                     "{context}"
                     "<context>")

resume_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", web_system_prompt),
        ("human", "{input}"),
    ]
)


In [20]:
question_answer_chain=create_stuff_documents_chain(llm,resume_prompt)
resume_rag_chain=create_retrieval_chain(resume_retriever,question_answer_chain)
resume_res = resume_rag_chain.invoke({'input':input_dict['input'],'job_info':web_res['answer']})
resume_res

{'input': 'can you write a cover letter for me?',
 'job_info': 'Based on the job description, I believe I would bring the following skills and qualifications to the role:\n\n1. Proven experience: With 5+ years of experience as a Data Analytics professional, I have a strong foundation in data analysis, visualization, and reporting, as well as experience working in a consulting environment.\n2. Analytical skills: I possess strong analytical skills, including the ability to assimilate and apply new techniques and knowledge to deliver insights and solve problems.\n3. Communication skills: I am able to effectively communicate technical information to non-technical colleagues and clients, which is critical for this role.\n4. Leadership skills: I have experience supervising and developing others, and I am confident in my ability to lead a team and inspire others to achieve their best.\n5. Technical skills: I have a working knowledge of key analytics tools such as PowerBI, Tableau, Qlik, SQL, 

In [21]:
resume_res['answer']

"Based on the job information provided, here's a cover letter that highlights your skills, experience, and accomplishments:\n\nDear Hiring Manager,\n\nI am excited to apply for the [Position] role at EY, as advertised. With a strong background in data analysis, machine learning, and innovation, I believe I would be a valuable addition to your team.\n\nAs a data analyst with a passion for solving complex problems, I have developed a unique ability to think creatively and challenge existing practices to develop innovative solutions. My experience in working with various data tools and technologies, including Langchain, JS, GPT 4o, SQLite3, FastAPI, and more, has equipped me with the skills to analyze and visualize data in a way that drives business decisions.\n\nMy portfolio showcases my ability to work on diverse projects, from developing a chatbot powered by ChatGPT to implementing predictive computer vision models. I am proud of my achievements, including winning the 1st Prize at the 

"Based on the job information provided, here's a cover letter that highlights your skills, experience, and accomplishments:
\n\nDear Hiring Manager,

\n\nI am excited to apply for the [Position] role at EY, as advertised. With a strong background in data analysis, machine learning, and innovation, I believe I would be a valuable addition to your team.\n\nAs a data analyst with a passion for solving complex problems, I have developed a unique ability to think creatively and challenge existing practices to develop innovative solutions. My experience in working with various data tools and technologies, including Langchain, JS, GPT 4o, SQLite3, FastAPI, and more, has equipped me with the skills to analyze and visualize data in a way that drives business decisions.
\n\nMy portfolio showcases my ability to work on diverse projects, from developing a chatbot powered by ChatGPT to implementing predictive computer vision models. I am proud of my achievements, including winning the 1st Prize at the Datathon 2024, University of Galway, and securing the IP Creation Award for Excellence in Patent Filing.\n\n
In addition to my technical skills, I possess excellent communication and collaboration skills, which have been demonstrated through my experience working with colleagues across multiple service lines. I am confident in my ability to build strong relationships and identify opportunities for our clients to benefit from our expertise in other areas.\n\n
I am particularly drawn to EY's commitment to innovation, inclusivity, and employee development. As someone who is passionate about contributing to the growth of the team and bringing energy, enthusiasm, and leadership to develop others, I believe I would thrive in your dynamic and supportive environment.\n\n
Thank you for considering my application. I would welcome the opportunity to discuss my qualifications further and explore how I can contribute to the success of EY.\n\n
Sincerely,\n
[Your Name]\n\n

Feel free to customize it as per your needs and preferences!"

In [None]:
s = 'cbbd'

ans = ''
subsrt = []
p2 = len(s)-1

for i in range(len(s)):
    ans = ''
    p2 = len(s)-1
    p1 =i
    print('startwith:',s[p1])
    while p1<=p2:
        if s[p1]==s[p2]:
            
            ans+=s[p1]
            p1+=1
            p2-=1
        else:
            p2-=1
            p1=i

    subsrt.append(ans)

max(subsrt, key=len)

c
b
b
d


'c'

In [15]:
subsrt

['c', 'b', 'b', 'd']

In [5]:
import pandas as pd
data = {
    'id': [1, 2, 3, 4, 5, 6, 7, 8],
    'position_name': ['Data Analyst', 'Data Scientist', 'Data Engineer', 'Data Analyst', 'Data Scientist', 'Data Analyst', 'Data Scientist', 'Data Analyst'],
    'start_date': ['2023-01-01', '2023-01-31', '2023-01-01', '2023-03-01', '2023-03-31', '2023-05-01', '2023-06-01', '2023-07-01'],
    'end_date': ['2023-01-31', '2023-03-31', '2023-01-31', '2023-03-31', '2023-05-31', '2023-06-30', '2023-07-31', '2023-08-31'],
    'user_id': [1, 1, 2, 3, 3, 4, 4, 5],
}

# Create DataFrame
user_experiences = pd.DataFrame(data)

df_analyst = user_experiences[user_experiences['position_name']=='Data Analyst']
df_scientist = user_experiences[user_experiences['position_name']=='Data Scientist']
df = pd.merge(df_analyst,df_scientist,how='inner',left_on='end_date',right_on='start_date')
df[df['user_id_x']==df['user_id_y']]

Unnamed: 0,id_x,position_name_x,start_date_x,end_date_x,user_id_x,id_y,position_name_y,start_date_y,end_date_y,user_id_y
0,1,Data Analyst,2023-01-01,2023-01-31,1,2,Data Scientist,2023-01-31,2023-03-31,1
1,4,Data Analyst,2023-03-01,2023-03-31,3,5,Data Scientist,2023-03-31,2023-05-31,3


In [3]:
user_experiences

Unnamed: 0,id,position_name,start_date,end_date,user_id
0,1,Data Analyst,2023-01-01,2023-01-31,1
1,2,Data Scientist,2023-02-01,2023-03-31,1
2,3,Data Engineer,2023-01-01,2023-01-31,2
3,4,Data Analyst,2023-03-01,2023-03-31,3
4,5,Data Scientist,2023-04-01,2023-05-31,3
5,6,Data Analyst,2023-05-01,2023-06-30,4
6,7,Data Scientist,2023-06-01,2023-07-31,4
7,8,Data Analyst,2023-07-01,2023-08-31,5


In [8]:
def intersection(a, b):
  a1 = set(a)
  b1 = set(b)
  ans = a1.intersection(b1)
  return list(ans)

In [9]:
intersection([1,2,3,4,5,6],[3,4,8,9])

[3, 4]

In [11]:
products={}

In [29]:
import requests
from bs4 import BeautifulSoup

url = "https://directsolarpower.com/collections/solar-kits"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
product_links = [a['href'] for a in soup.find_all('a', class_='productitem--image-link', href=True)]

# Print the extracted product URLs
print(product_links)

['/collections/solar-kits/products/eg4-12kpv-hybrid-solar-inverter-eg4-wallmount-indoor-battery-bundle-14-3kwh-85-8kwh-capacity', '/collections/solar-kits/products/eg4-18kpv-hybrid-inverter-eg4-ll-s-48v-100ah-lithium-battery-bundle-30-72kwh', '/collections/solar-kits/products/eg4-6000xp-eg4-powerpro-wallmount-battery-bundle-14-3-85-8kwh-capacity', '/collections/solar-kits/products/eg4-ll-s-lithium-battery-bundle-30-72kwh-6-server-rack-batteries-with-pre-assembled-enclosed-battery-rack', '/collections/solar-kits/products/eg4-powerpro-ess-system-bundle-14-3-85-8kwh-capacity-eg4-18kpv-eg4-powerpro-wallmount-battery', '/collections/solar-kits/products/eg4-48v-indoor-280ah-wall-mount-battery-eg4-18kpv-18-000w-pv-input-inverter-system-bundle', '/collections/solar-kits/products/eg4-48v-indoor-280ah-wall-mount-battery-eg4-6000xp-off-grid-inverter-system-bundle']


In [30]:
products['solar-kits'] = product_links

In [None]:
products

{'solar-panels': ['/collections/solar-panels/products/hyperion-400w-bifacial-solar-panel-black-up-to-500w-with-bifacial-gain',
  '/collections/solar-panels/products/hyperion-395w-bifacial-solar-panel-black-up-to-495w-with-bifacial-gain',
  '/collections/solar-panels/products/aptos-400w-bifacial-solar-panels-black-up-to-500w-with-bifacial-gain-dna-108-bf10',
  '/collections/solar-panels/products/aptos-440-watt-bifacial-solar-panels-black-dna-120-bf10-440w',
  '/collections/solar-panels/products/boviet-450w-bifacial-solar-panel-silver-up-to-540w-with-bifacial-gain-bvm6612m-450s-h-hc-bf-dg',
  '/collections/solar-panels/products/adani-solar-530w-half-cut-mono-crystalline-bifacial-solar-panels-silver-asb-m10-144-530-up-to-645w-with-bifacial-gain',
  '/collections/solar-panels/products/canadian-solar-400w-mono-crystalline-solar-panel-black-cs6r-400ms-hl',
  '/collections/solar-panels/products/hyperion-395w-bifacial-solar-panel-black-up-to-495w-with-bifacial-gain-copy',
  '/collections/solar

In [40]:
url = "https://directsolarpower.com"

new_prod_dict = {}
for key in products.keys():
    new_prod_dict[key] = {}
    products_list = products[key]
    for product in products_list:
        response = requests.get(url+product)
        soup = BeautifulSoup(response.text, 'html.parser')
        pdf_links = [a['href'] for a in soup.find_all('a', href=True) if '.pdf' in a['href']]
        new_prod_dict[key][product.split('/')[-1]] = pdf_links
        # print(f'{key}:{product}')

# print(new_prod_dict)

In [None]:
import json
with open('products.json', 'r') as fp:
    json.dump(new_prod_dict, fp)

In [None]:
import pandas as pd

df = pd.DataFrame(product_links, columns=["Product URL"])
df.to_csv("product_links.csv", index=False)

In [2]:
import json
with open('products.json') as json_file:
    data = json.load(json_file)

In [8]:
i=0
for cat in data.keys():
    for prod in data[cat].keys():
        l = data[cat][prod]
        i+=len(l)

i

149

In [2]:
ver_list = [{'timestamp': 1000, 'content': 'Hello'},
 {'timestamp': 2100, 'content': 'How are you'},
  {'timestamp': 1100, 'content': 'Hi there'}, 
  {'timestamp': 3000, 'content': 'Goodbye'}]

def myFunc(e):
  return e['timestamp']

def sort_versions(ver_list):
    ver_list.sort(key=myFunc)
    return ver_list

print(sort_versions(ver_list))

[{'timestamp': 1000, 'content': 'Hello'}, {'timestamp': 1100, 'content': 'Hi there'}, {'timestamp': 2100, 'content': 'How are you'}, {'timestamp': 3000, 'content': 'Goodbye'}]
