<a href="https://colab.research.google.com/github/SampathK/MyExperimentalNotebooks/blob/main/Large_Text_Summarization_Using_LangChain_AWS_BedRock_Mistral_Large.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%sh
pip install -Uq boto3 langchain langchain_community tiktoken

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 139.2/139.2 kB 940.0 kB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 974.6/974.6 kB 9.4 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.2/2.2 MB 32.2 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 40.1 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.3/12.3 MB 30.2 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 82.7/82.7 kB 6.7 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 321.8/321.8 kB 21.9 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 127.4/127.4 kB 8.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 49.2/49.2 kB 3.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 145.0/145.0 kB 5.0 MB/s eta 0:00:00


In [None]:
file_path = "./input/meeting_3.txt"

# Open the file in read mode and read its contents into a string
with open(file_path, "r") as file:
    call_transcripts = file.read()

In [None]:
import re

def preprocess_transcript(transcript):
    # Remove timestamps (assuming the format is [hh:mm:ss])
    transcript = re.sub(r'\[\d{2}:\d{2}:\d{2}\]', '', transcript)

    # Remove extra whitespace
    transcript = re.sub(r'\s+', ' ', transcript).strip()

    # Remove any unnecessary special characters (if needed)
    transcript = re.sub(r'[^\w\s.,?!:]', '', transcript)

    transcript = re.sub(r"<.*>", "", transcript)
    transcript = re.sub(r"\[(.*)\]", "", transcript)
    transcript = re.sub(r"\n\n\n", "\n", transcript)

    return transcript

In [None]:
processed_transcript = preprocess_transcript(call_transcripts)

In [None]:
from langchain_community.document_loaders import TextLoader
def get_document(file_path):
    loader = TextLoader(file_path)
    return loader.load()

In [None]:
doc = get_document(file_path)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_doc(doc, chunk_size, chunk_overlap):
    # Create a RecursiveCharacterTextSplitter instance
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,  # Set the chunk size
        chunk_overlap=chunk_overlap,  # Set the chunk overlap
        separators=['\n', '.', '\n\n'],  # Define the separators
        keep_separator=False  # Do not keep the separators
    )

    # Split the text using the RecursiveCharacterTextSplitter
    chunks = splitter.split_documents(doc)
    return chunks

In [None]:
import boto3
from langchain_community.chat_models import BedrockChat
from google.colab import userdata
def get_aws_bedrock_summarization_llm():
  model_id = "mistral.mistral-large-2402-v1:0"
  session = boto3.client(
    aws_access_key_id=userdata.get("AWS_SERVER_PUBLIC_KEY"),
    aws_secret_access_key=userdata.get("AWS_SERVER_SECRET_KEY"),
    service_name="bedrock-runtime",
    region_name="us-east-1"
  )
  model_kwargs =  {
    "temperature": 0.2,
    "max_tokens": 8192,
    "top_p": 1,
    "top_k": 200,
    "stop":["</s>"]
  }
  model = BedrockChat(
    client=session,
    model_id=model_id,
    model_kwargs=model_kwargs,
  )
  return model


In [None]:
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain_text_splitters import CharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain



llm = get_aws_bedrock_summarization_llm()
map_template = """<s>[INST]<text>{docs}</text>
Please summarize the above meeting transcript within the text XML tag into a comprehensive summary, including all key details. The summary should include the following fields:

1. Participants: List all unique participants or human or owner names mentioned in the meeting.
2. Summary: Provide a brief overview of the main topics discussed during the meeting.
3. Key Points: Extract the most significant points discussed.
4. Decisions Made: List all decisions made during the meeting.
5. Action Items: Include detailed action items with descriptions, owners, and deadlines (if accurately determined from the transcript, otherwise do not include the deadline field).
6. Strengths: Capture any positive aspects or strengths highlighted during the meeting.
7. Weaknesses: Capture any negative aspects or weaknesses highlighted during the meeting.
8. Progress Updates: Provide updates on previous action items and overall project progress.
9. Meeting Date: Record the date when the meeting took place (YYYY-MM-DD).
10. Meeting Duration: Accurately calculate the duration of the meeting in minutes, else do not include this field.
11. Meeting Type: Categorize the type of meeting (e.g., planning, review, status update).
12. Sentiment Scores: Provide numeric scores for positive and negative sentiments.
13. Tags: Include keywords or tags relevant to the meeting content.
14. Project Name: Associate the meeting with a only one relevant specific project.
14. Project Name: Associate the meeting with a relevant specific project.
15. Location: Specify the location (physical or virtual) of the meeting.
Exclude any PCI data from the summary.
Ignore and do not include in the JSON any field for which accurate information is not identified in the summary.
The final summary should be in JSON format only. Avoid any additional texts, structured as follows:
{{
  "participants": ["Participant 1", "Participant 2", "Participant 3",...],
  "summary": "Brief overview of the meeting and main topics discussed.",
  "key_points": [
    "Key point 1",
    "Key point 2",
    "Key point 3",
    ...
  ],
  "decisions_made": [
    "Decision 1",
    "Decision 2",
    "Decision 3",
    ...
  ],
  "action_items": [
    {{"description": "Action item 1", "owner": "Person 1", "deadline": "YYYY-MM-DD"}},
    {{"description": "Action item 2", "owner": "Person 2", "deadline": "YYYY-MM-DD"}},
    ...
  ],
  "strengths": [
    "Positive aspect 1",
    "Positive aspect 2",
    ...
  ],
  "weaknesses": [
    "Negative aspect 1",
    "Negative aspect 2",
    ...
  ],
  "progress_updates": [
    "Update on previous action item 1",
    "Update on previous action item 2",
    ...
  ],
  "meeting_date": "YYYY-MM-DD",
  "meeting_duration": "60 Min",
  "meeting_type": "Planning",
  "positive_sentiment_score": 0.8,
  "negative_sentiment_score": 0.1,
  "tags": ["tag1", "tag2", "tag3",...],
  "project_name": "Project Name",
  "location": "Location"
  }}
  [/INST]
"""
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)
reduce_template = """<s>[INST]<summaries>{docs}</summaries>
Please summarize the above client meeting transcript summaries within the summaries XML tag of a document into a concise, comprehensive consolidated summary. Make sure to include the key points, decisions made, action items, positive and negative sentiments, progress updates, and names of participants mentioned in the summaries.
If it is not possible to create a single summary without losing important details due to length constraints, provide the summaries as they are.
The summary should include:
1. Participants: List all unique participants mentioned in the meeting.
2. Summary: Provide a brief overview of the unique main topics discussed during the meeting.
3. Key Points: Extract the most significant unique points discussed.
4. Decisions Made: List all unique decisions made during the meeting.
5. Action Items: Include detailed action items with descriptions, owners, and deadlines (if accurately determined from the transcript, otherwise do not include the deadline field).
6. Strengths: Capture any unique positive aspects or strengths highlighted during the meeting.
7. Weaknesses: Capture any unique negative aspects or weaknesses highlighted during the meeting.
8. Progress Updates: Provide unique updates on previous action items and overall project progress.
9. Meeting Date: Record the date when the meeting took place (YYYY-MM-DD).
10. Meeting Duration: Accurately calculate the duration of the meeting in minutes, else do not include this field.
11. Meeting Type: Categorize the type of meeting (e.g., planning, review, status update).
12. Sentiment Scores: Provide overall numeric scores for positive and negative sentiments.
13. Tags: Include unique keywords or tags relevant to the meeting content.
14. Project Name: Associate the meeting with a only one relevant specific project.
15. Location: Specify the location (physical or virtual) of the meeting.
Exclude any PCI data from the summary.
Ignore and do not include in the JSON any field for which accurate information is not identified in the summary.
The final summary should be in JSON format only. Avoid any additional texts, structured as follows:
{{
  "participants": ["Participant 1", "Participant 2", "Participant 3"],
  "summary": "Brief overview of the meeting and main topics discussed.",
  "key_points": [
    "Key point 1",
    "Key point 2",
    "Key point 3",
    ...
  ],
  "decisions_made": [
    "Decision 1",
    "Decision 2",
    "Decision 3",
    ...
  ],
  "action_items": [
    {{"description": "Action item 1", "owner": "Person 1", "deadline": "YYYY-MM-DD"}},
    {{"description": "Action item 2", "owner": "Person 2", "deadline": "YYYY-MM-DD"}},
    ...
  ],
  "strengths": [
    "Positive aspect 1",
    "Positive aspect 2",
    ...
  ],
  "weaknesses": [
    "Negative aspect 1",
    "Negative aspect 2",
    ...
  ],
  "progress_updates": [
    "Update on previous action item 1",
    "Update on previous action item 2",
    ...
  ],
  "meeting_date": "YYYY-MM-DD",
  "meeting_duration": "60 Min",
  "meeting_type": "Planning",
  "positive_sentiment_score": 0.8,
  "negative_sentiment_score": 0.1,
  "tags": ["tag1", "tag2", "tag3",...],
  "project_name": "Project Name",
  "location": "Location"
  }}
  [/INST]
"""
reduce_prompt = PromptTemplate.from_template(reduce_template)
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="docs"
)

# Combines and iteratively reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=8192,
)
chunk_size = 8192
chunk_overlap = 2048
split_docs = split_doc(doc,chunk_size,chunk_overlap)
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)
result = map_reduce_chain.invoke(split_docs)

In [None]:
print(result["output_text"])

 {
  "participants": ["Emily", "Jane", "Sarah", "Michael"],
  "summary": "The meetings focused on updates and progress of the marketing campaign, including influencer collaborations, email campaigns, SEO improvements, and addressing any concerns or issues.",
  "key_points": [
    "Two influencers have agreed to collaborate with the team, with the first set of posts scheduled for next week.",
    "Email campaigns have seen an increase in open rates, click-through rates, and conversions, with positive feedback on the new templates.",
    "SEO improvements have led to an increase in organic search traffic, with further optimizations planned.",
    "Influencer collaborations are progressing well with positive engagement and upcoming live events.",
    "Concerns about website user experience and customer support workload were raised."
  ],
  "decisions_made": [
    "The team will move forward with the two interested influencers.",
    "The social media content strategy will be adjusted to m

In [None]:
import boto3
import json
from datetime import datetime
from langchain.document_loaders import S3FileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import BedrockChat
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain_core.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from requests.auth import HTTPBasicAuth
import requests

def update_status(file_id, status, s3_key_summary=None, error_message=None):
    dynamodb = boto3.resource('dynamodb')
    table = dynamodb.Table('your_dynamodb_table_name')

    timestamp = datetime.utcnow().isoformat()
    item = {
        'file_id': file_id,
        'status_timestamp': timestamp,
        'status': status
    }
    if s3_key_summary:
        item['s3_key_summary'] = s3_key_summary
    if error_message:
        item['error_message'] = error_message

    table.put_item(Item=item)

def lambda_handler(event, context):
    try:
        # Extract bucket name and object key from the event
        bucket_name = event['Records'][0]['s3']['bucket']['name']
        file_key = event['Records'][0]['s3']['object']['key']
        response = s3_client.head_object(Bucket=bucket_name, Key=file_key)
        file_id = response['Metadata'].get('file_id')
        if not file_id:
            raise ValueError("file_id metadata not found in S3 object")
        open_search_endpoint = 'your-open-search-endpoint'
        open_search_index = 'your-open-search-index'
        open_search_username = 'your-open-search-username'
        open_search_password = 'your-open-search-password'

        # Update status to 'Processing'
        update_status(file_id, 'Processing')

        # Load the document from S3 using S3FileLoader
        def get_document(bucket_name, file_key):
            loader = S3FileLoader(bucket=bucket_name, key=file_key)
            return loader.load()

        doc = get_document(bucket_name, file_key)

        # Split the document
        def split_doc(doc, chunk_size, chunk_overlap):
            splitter = RecursiveCharacterTextSplitter(
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                separators=['\n', '.', '\n\n'],
                keep_separator=False
            )
            chunks = splitter.split_documents(doc)
            return chunks

        # Set up AWS Bedrock LLM
        def get_aws_bedrock_summarization_llm():
            model_id = "mistral.mistral-large-2402-v1:0"
            session = boto3.client(
                'bedrock-runtime',
                aws_access_key_id='YOUR_AWS_ACCESS_KEY_ID',
                aws_secret_access_key='YOUR_AWS_SECRET_ACCESS_KEY',
                region_name='us-east-1'
            )
            model_kwargs = {
                "temperature": 0.2,
                "max_tokens": 8192,
                "top_p": 1,
                "top_k": 200,
                "stop": ["</s>"]
            }
            model = BedrockChat(
                client=session,
                model_id=model_id,
                model_kwargs=model_kwargs,
            )
            return model

        # Define the prompt templates
        map_template = """<s>[INST]<text>{docs}</text>
        Please summarize the above meeting transcript within the text XML tag into a comprehensive summary, including all key details. The summary should include the following fields:

        1. Participants: List all unique participants or human or owner names mentioned in the meeting.
        2. Summary: Provide a brief overview of the main topics discussed during the meeting.
        3. Key Points: Extract the most significant points discussed.
        4. Decisions Made: List all decisions made during the meeting.
        5. Action Items: Include detailed action items with descriptions, owners, and deadlines (if accurately determined from the transcript, otherwise do not include the deadline field).
        6. Strengths: Capture any positive aspects or strengths highlighted during the meeting.
        7. Weaknesses: Capture any negative aspects or weaknesses highlighted during the meeting.
        8. Progress Updates: Provide updates on previous action items and overall project progress.
        9. Meeting Date: Record the date when the meeting took place (YYYY-MM-DD).
        10. Meeting Duration: Accurately calculate the duration of the meeting in minutes, else do not include this field.
        11. Meeting Type: Categorize the type of meeting (e.g., planning, review, status update).
        12. Sentiment Scores: Provide numeric scores for positive and negative sentiments.
        13. Tags: Include keywords or tags relevant to the meeting content.
        14. Project Name: Associate the meeting with a only one relevant specific project.
        15. Location: Specify the location (physical or virtual) of the meeting.
        Exclude any PCI data from the summary.
        Ignore and do not include in the JSON any field for which accurate information is not identified in the summary.
        The final summary should be in JSON format only. Avoid any additional texts, structured as follows:
        {{
          "participants": ["Participant 1", "Participant 2", "Participant 3",...],
          "summary": "Brief overview of the meeting and main topics discussed.",
          "key_points": [
            "Key point 1",
            "Key point 2",
            "Key point 3",
            ...
          ],
          "decisions_made": [
            "Decision 1",
            "Decision 2",
            "Decision 3",
            ...
          ],
          "action_items": [
            {{"description": "Action item 1", "owner": "Person 1", "deadline": "YYYY-MM-DD"}},
            {{"description": "Action item 2", "owner": "Person 2", "deadline": "YYYY-MM-DD"}},
            ...
          ],
          "strengths": [
            "Positive aspect 1",
            "Positive aspect 2",
            ...
          ],
          "weaknesses": [
            "Negative aspect 1",
            "Negative aspect 2",
            ...
          ],
          "progress_updates": [
            "Update on previous action item 1",
            "Update on previous action item 2",
            ...
          ],
          "meeting_date": "YYYY-MM-DD",
          "meeting_duration": "60 Min",
          "meeting_type": "Planning",
          "positive_sentiment_score": 0.8,
          "negative_sentiment_score": 0.1,
          "tags": ["tag1", "tag2", "tag3",...],
          "project_name": "Project Name",
          "location": "Location"
        }}
        [/INST]
        """
        map_prompt = PromptTemplate.from_template(map_template)
        map_chain = LLMChain(llm=get_aws_bedrock_summarization_llm(), prompt=map_prompt)

        reduce_template = """<s>[INST]<summaries>{docs}</summaries>
        Please summarize the above client meeting transcript summaries within the summaries XML tag of a document into a concise, comprehensive consolidated summary. Make sure to include the key points, decisions made, action items, positive and negative sentiments, progress updates, and names of participants mentioned in the summaries.
        If it is not possible to create a single summary without losing important details due to length constraints, provide the summaries as they are.
        The summary should include:
        1. Participants: List all unique participants mentioned in the meeting.
        2. Summary: Provide a brief overview of the unique main topics discussed during the meeting.
        3. Key Points: Extract the most significant unique points discussed.
        4. Decisions Made: List all unique decisions made during the meeting.
        5. Action Items: Include detailed action items with descriptions, owners, and deadlines (if accurately determined from the transcript, otherwise do not include the deadline field).
        6. Strengths: Capture any unique positive aspects or strengths highlighted during the meeting.
        7. Weaknesses: Capture any unique negative aspects or weaknesses highlighted during the meeting.
        8. Progress Updates: Provide unique updates on previous action items and overall project progress.
        9. Meeting Date: Record the date when the meeting took place (YYYY-MM-DD).
        10. Meeting Duration: Accurately calculate the duration of the meeting in minutes, else do not include this field.
        11. Meeting Type: Categorize the type of meeting (e.g., planning, review, status update).
        12. Sentiment Scores: Provide overall numeric scores for positive and negative sentiments.
        13. Tags: Include unique keywords or tags relevant to the meeting content.
        14. Project Name: Associate the meeting with a only one relevant specific project.
        15. Location: Specify the location (physical or virtual) of the meeting.
        Exclude any PCI data from the summary.
        Ignore and do not include in the JSON any field for which accurate information is not identified in the summary.
        The final summary should be in JSON format only. Avoid any additional texts, structured as follows:
        {{
          "participants": ["Participant 1", "Participant 2", "Participant 3"],
          "summary": "Brief overview of the meeting and main topics discussed.",
          "key_points": [
            "Key point 1",
            "Key point 2",
            "Key point 3",
            ...
          ],
          "decisions_made": [
            "Decision 1",
            "Decision 2",
            "Decision 3",
            ...
          ],
          "action_items": [
            {{"description": "Action item 1", "owner": "Person 1", "deadline": "YYYY-MM-DD"}},
            {{"description": "Action item 2", "owner": "Person 2", "deadline": "YYYY-MM-DD"}},
            ...
          ],
          "strengths": [
            "Positive aspect 1",
            "Positive aspect 2",
            ...
          ],
          "weaknesses": [
            "Negative aspect 1",
            "Negative aspect 2",
            ...
          ],
          "progress_updates": [
            "Update on previous action item 1",
            "Update on previous action item 2",
            ...
          ],
          "meeting_date": "YYYY-MM-DD",
          "meeting_duration": "60 Min",
          "meeting_type": "Planning",
          "positive_sentiment_score": 0.8,
          "negative_sentiment_score": 0.1,
          "tags": ["tag1", "tag2", "tag3",...],
          "project_name": "Project Name",
          "location": "Location"
        }}
        [/INST]
        """
        reduce_prompt = PromptTemplate.from_template(reduce_template)
        reduce_chain = LLMChain(llm=get_aws_bedrock_summarization_llm(), prompt=reduce_prompt)

        # Set up document chains
        combine_documents_chain = StuffDocumentsChain(
            llm_chain=reduce_chain, document_variable_name="docs"
        )

        reduce_documents_chain = ReduceDocumentsChain(
            combine_documents_chain=combine_documents_chain,
            collapse_documents_chain=combine_documents_chain,
            token_max=8192,
        )

        # Split documents
        chunk_size = 8192
        chunk_overlap = 2048
        split_docs = split_doc(doc, chunk_size, chunk_overlap)

        map_reduce_chain = MapReduceDocumentsChain(
            llm_chain=map_chain,
            reduce_documents_chain=reduce_documents_chain,
            document_variable_name="docs",
            return_intermediate_steps=False,
        )

        # Process documents
        result = map_reduce_chain.invoke(split_docs)

        # Write result to S3
        result_s3_key = f'output/{file_id}_result.json'
        s3_client = boto3.client('s3')
        s3_client.put_object(Bucket=bucket_name, Key=result_s3_key, Body=json.dumps(result))

        # Write result to OpenSearch
        headers = {"Content-Type": "application/json"}
        response = requests.post(
            f"{open_search_endpoint}/{open_search_index}/_doc",
            auth=HTTPBasicAuth(open_search_username, open_search_password),
            headers=headers,
            data=json.dumps(result)
        )

        if response.status_code != 201:
            raise Exception(f"Failed to index document to OpenSearch: {response.text}")

        # Update status to 'Completed'
        update_status(file_id, 'Completed', s3_key_summary=result_s3_key)

    except Exception as e:
        # Update status to 'Failed' with error message
        update_status(file_id, 'Failed', error_message=str(e))
        raise

    # Return result
    return {
        'statusCode': 200,
        'body': json.dumps(result)
    }


### Notes:
1. **OpenSearch Credentials and Endpoint**:
   - Make sure to provide the OpenSearch endpoint, index, username, and password in the event payload.

2. **Environment Variables**:
   - Instead of hardcoding the AWS credentials, consider using Lambda environment variables for `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` for better security.

3. **Dependencies**:
   - Ensure `requests` library is available in the Lambda environment. You might need to package this dependency with your Lambda function or use a Lambda layer.

4. **IAM Permissions**:
   - Ensure the Lambda function's IAM role has permissions to write to S3, interact with AWS Bedrock, and log to CloudWatch. Additionally, the role needs permission to access the OpenSearch endpoint if you are using VPC Endpoints for OpenSearch.

By following these steps and ensuring the necessary configurations, your Lambda function will be able to process the meeting transcripts, generate summaries, and store the results in both S3 and OpenSearch.


{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::your-bucket-name",
                "arn:aws:s3:::your-bucket-name/*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "bedrock:InvokeModel"
            ],
            "Resource": "*"
        },
        {
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogGroup",
                "logs:CreateLogStream",
                "logs:PutLogEvents"
            ],
            "Resource": "*"
        },
        {
            "Effect": "Allow",
            "Action": [
                "es:ESHttpPost",
                "es:ESHttpPut"
            ],
            "Resource": "arn:aws:es:your-region:your-account-id:domain/your-domain-name/*"
        }
    ]
}
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "lambda.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

In [None]:
mkdir my_lambda_function
cd my_lambda_function
python3 -m venv venv
source venv/bin/activate
pip install requests boto3 langchain langchain_community
mkdir -p package/python
pip install requests boto3 langchain langchain_community -t package/python
cp lambda_function.py package/
cd package
zip -r ../my_lambda_function.zip .
cd ..

aws s3 cp my_lambda_function.zip s3://your-bucket-name/path/to/my_lambda_function.zip
aws lambda update-function-code --function-name your_lambda_function_name --s3-bucket your-bucket-name --s3-key path/to/my_lambda_function.zip





In [None]:
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": "lambda:InvokeFunction",
            "Resource": "arn:aws:lambda:your-region:your-account-id:function:your-lambda-function-name"
        }
    ]
}

Click on the "Properties" tab.
Add Notification:

Scroll down to the "Event notifications" section.
Click "Create event notification".
Configure Event Notification:

Name your event (e.g., NewObjectUpload).
Select "All object create events" or specify a prefix/suffix if you want to filter the events.
Choose "Lambda Function" as the destination.
Select your Lambda function from the dropdown.
Click "Save".
