In [52]:
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
import base64
import email
import os

from langchain_community.agent_toolkits import GmailToolkit
from langchain_community.tools.gmail.utils import (
    build_resource_service,
)

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def create_credentials():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # 사용자 인증이 필요한 경우
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # 다음 번 사용을 위해 인증된 사용자 정보를 저장
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return creds

creds = create_credentials()
api_resource = build_resource_service(credentials=creds)
service = build('gmail', 'v1', credentials=creds)
toolkit = GmailToolkit(api_resource=api_resource)

In [53]:
tools = toolkit.get_tools()
tools

[GmailCreateDraft(api_resource=<googleapiclient.discovery.Resource object at 0x000002399A2BFAF0>),
 GmailSendMessage(api_resource=<googleapiclient.discovery.Resource object at 0x000002399A2BFAF0>),
 GmailSearch(api_resource=<googleapiclient.discovery.Resource object at 0x000002399A2BFAF0>),
 GmailGetMessage(api_resource=<googleapiclient.discovery.Resource object at 0x000002399A2BFAF0>),
 GmailGetThread(api_resource=<googleapiclient.discovery.Resource object at 0x000002399A2BFAF0>)]

In [54]:
from langchain import hub
instructions = """You are an assistant. return output only."""
base_prompt = hub.pull("langchain-ai/openai-functions-template")
prompt = base_prompt.partial(instructions=instructions)

In [55]:
from langchain_openai import ChatOpenAI, OpenAI
from langchain.agents import AgentExecutor, create_openai_functions_agent

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, streaming=True, max_tokens=2048)
agent = create_openai_functions_agent(llm, toolkit.get_tools(), prompt)

In [56]:
agent_executor = AgentExecutor(
    agent=agent,
    tools=toolkit.get_tools(),
)

In [69]:
search_result = agent_executor.invoke({"input": "`search_gmail` with `{'query': 'from':'Medium Daily Digest', 'max_results': 1}`. Return Most recent matching mail with id ONLY."})
print(search_result["output"])

Most recent matching email ID: 18d85ba63be5cf84


In [70]:
search_result_output = search_result["output"].split(":")[-1].lstrip(" ") # parse result
print(search_result_output)

18d85ba63be5cf84


In [71]:
def get_message(service, user_id, message_id):
    try:
        message = service.users().messages().get(userId=user_id, id=message_id, format='raw').execute()
        print('Message snippet: %s' % message['snippet'])

        msg_str = base64.urlsafe_b64decode(message['raw'].encode('ASCII'))
        mime_msg = email.message_from_bytes(msg_str)

        # 메일 본문 찾기
        if mime_msg.is_multipart():
            for part in mime_msg.walk():
                if part.get_content_type() == 'text/html':
                    html_content = part.get_payload(decode=True).decode()
                    break
        else:
            html_content = mime_msg.get_payload(decode=True).decode()

        return html_content
    except Exception as error:
        print('An error occurred: %s' % error)

# 메일 내용 가져오기 및 파싱 예제
user_id = 'me'  # 현재 로그인한 사용자
message_id = search_result_output  # 가져오고자 하는 메시지의 ID
html_content = get_message(service, user_id, message_id)


Message snippet: Rhcp Stories for Rhcp @rhcp1134·Become a member Medium daily digest Today&#39;s highlights Cobus Greyling Cobus Greyling· 5 min read Corrective RAG (CRAG) By now, RAG is an accepted and well


In [72]:
from langchain.schema.document import Document
from langchain_community.document_transformers import BeautifulSoupTransformer

doc = Document(page_content=html_content)
bs = BeautifulSoupTransformer()
bs_content = bs.transform_documents(documents=[doc], tags_to_extract=["a"])

In [73]:
for page in bs_content:
    print(page.page_content)

Stories for (https://medium.com/@rhcp1134?source=email-2483a20590b9-1707327775011-digest.reader-------------------------b4ee6f77_9df0_4218_a348_b5c40ff9910f) Rhcp @rhcp1134 (https://medium.com/@rhcp1134?source=email-2483a20590b9-1707327775011-digest.reader-------------------------b4ee6f77_9df0_4218_a348_b5c40ff9910f) Become a member (https://medium.com/plans?source=email-2483a20590b9-1707327775011-digest.reader-------------------------b4ee6f77_9df0_4218_a348_b5c40ff9910f) Cobus Greyling (https://medium.com/@cobusgreyling?source=email-2483a20590b9-1707327775011-digest.reader--5e40467099f8----0-98------------------b4ee6f77_9df0_4218_a348_b5c40ff9910f-1) Corrective RAG (CRAG) By now, RAG is an accepted and well established standard for addressing data relevance for in-context… James Presbitero Jr. (https://medium.com/@jamespresbiterojr?source=email-2483a20590b9-1707327775011-digest.reader-b9c709a27d5e-9b04f399d88c----1-98------------------b4ee6f77_9df0_4218_a348_b5c40ff9910f-1) Practice i

In [75]:
from bs4 import BeautifulSoup
from urllib.parse import urlparse

soup = BeautifulSoup(html_content, 'html.parser')
links = soup.find_all('a')

def validation(url):
    parsed_url = urlparse(url)

    if parsed_url.scheme == "https" and parsed_url.netloc == "medium.com":
            # 경로에서 @username 확인
            path_parts = parsed_url.path.split('/')
            if len(path_parts) >= 3 and path_parts[1].startswith('@'):
                return True
    return False

url_dict = {}

for link in links:
    text = link.get_text(strip=True)
    url = link.get('href').split("?")[0]

    if validation(url):
        url_dict[url] = text

In [81]:
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

#Parsing with Custom Pydantic Object
class URL_TABLE(BaseModel):
    url:str = Field(description="url")
    description:str = Field(description="description that describe url")

class URLTextList(BaseModel):
    url_text_pairs: List[URL_TABLE]

parser = PydanticOutputParser(pydantic_object=URLTextList)
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
# parse result with PydanticOutputParser
chain = llm | parser
result = chain.invoke(f"""get dict {url_dict}. Show value and url if it is related to LLM or Python or Programming.
    Output should be dictionary like this .
    <Example output>                       
                     {{
    "url_text_pairs": [
        {{
            "url": "https://example.com/1",
            "description": "Example description 1"
        }},
        {{
            "url": "https://example.com/2",
            "description": "Example description 2"
        }}
    ]}}""")

In [77]:
print(result)

url_text_pairs=[URL_TABLE(url='https://medium.com/@cobusgreyling/corrective-rag-crag-5e40467099f8', description='Corrective RAG (CRAG)By now, RAG is an accepted and well established standard for addressing data relevance for in-context…'), URL_TABLE(url='https://medium.com/@marcllopart/from-text-and-images-to-chat-transforming-data-into-conversations-with-pinecone-and-langchain-a57e196aff20', description='From text and images to Chat: Transforming data into conversations with…Transform text & images into chat: unleash AI with Pinecone & LangChain for dynamic data conversations.'), URL_TABLE(url='https://medium.com/@florian_algo/advanced-rag-03-using-ragas-llamaindex-for-rag-evaluation-84756b82dca7', description='Advanced RAG 03: Using RAGAs + LlamaIndex for RAG evaluationIn this article, we first introduce evaluation metrics for RAG proposed by RAGAs(Retrieval Augmented…'), URL_TABLE(url='https://medium.com/@zainbaq/querygpt-harnessing-generative-ai-to-query-your-data-with-natural-lang

In [78]:
for content in result.url_text_pairs:
    print(content.url)

https://medium.com/@cobusgreyling/corrective-rag-crag-5e40467099f8
https://medium.com/@marcllopart/from-text-and-images-to-chat-transforming-data-into-conversations-with-pinecone-and-langchain-a57e196aff20
https://medium.com/@florian_algo/advanced-rag-03-using-ragas-llamaindex-for-rag-evaluation-84756b82dca7
https://medium.com/@zainbaq/querygpt-harnessing-generative-ai-to-query-your-data-with-natural-language-63fdfefaa888
https://medium.com/@dassum/fine-tune-large-language-model-llm-on-a-custom-dataset-with-qlora-fb60abdeba07
https://medium.com/@venkat.ramrao/training-an-llm-re-ranker-using-direct-preference-optimization-981c732e92b0


In [79]:
from datetime import date

today = date.today()

In [80]:
prompt_format = f"Here is your Today Daily LLM Digest!: {today}\n\n"
for idx, content in enumerate(result.url_text_pairs, 1):
    prompt_format += f"{idx}. \"{content.description}\" \n\t-url: {content.url}\n"
print(prompt_format)

Here is your Today Daily LLM Digest!: 2024-02-13

1. "Corrective RAG (CRAG)By now, RAG is an accepted and well established standard for addressing data relevance for in-context…" 
	-url: https://medium.com/@cobusgreyling/corrective-rag-crag-5e40467099f8
2. "From text and images to Chat: Transforming data into conversations with…Transform text & images into chat: unleash AI with Pinecone & LangChain for dynamic data conversations." 
	-url: https://medium.com/@marcllopart/from-text-and-images-to-chat-transforming-data-into-conversations-with-pinecone-and-langchain-a57e196aff20
3. "Advanced RAG 03: Using RAGAs + LlamaIndex for RAG evaluationIn this article, we first introduce evaluation metrics for RAG proposed by RAGAs(Retrieval Augmented…" 
	-url: https://medium.com/@florian_algo/advanced-rag-03-using-ragas-llamaindex-for-rag-evaluation-84756b82dca7
4. "QueryGPT — Harnessing Generative AI To Query Your Data With Natural Language.A prototype tool powered by Large Language Models to make 