In [None]:
import boto3
import json

def fetch_uuid_file(bucket_name, file_name):
    s3 = boto3.client('s3')
    try:
        obj = s3.get_object(Bucket=bucket_name, Key=file_name)
        file_content = obj['Body'].read().decode('utf-8')
        return json.loads(file_content)
    except Exception as e:
        print(f"Error fetching file from S3: {e}")
        return None


In [113]:
import requests
import os

def process_reviews_with_google_ai(source_code):
    google_api_key = "AIzaSyCDSeoTj9yTAQTm-SDdj3XIyu3ir7lcj_k"
    
    response = requests.post(
        url=f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={google_api_key}",
        headers={"Content-Type": "application/json"},
        json={
            "contents": [
                {
                    "parts": [
                        {
                            "text": (
                                """Extract all the reviews from the source code. 
Represent the extracted data as a JSON, keys: 
'title', 'body', 'rating', 'reviewer'. 
If a field is empty, assign the value 'null'. 

**Output the JSON data directly, without any formatting, 
code blocks, or surrounding text. Just a simple text with not tabs and new line**"""
                            )
                        }
                    ]
                }
            ]
        }
    )
    
    if response.status_code == 200:
        data = response.json()
        message_content = data['candidates'][0]['content']['parts'][0]['text'].strip()
        return message_content.split(",")
    else:
        print(f"Error: {response.text}")
        return None


In [73]:
def lambda_handler(event, context):
    # Fetch UUID JSON file from S3
    bucket_name = 'your-s3-bucket'
    uuid = event.get('uuid')
    file_name = f'{uuid}.json'

    uuid_data = fetch_uuid_file(bucket_name, file_name)
    if not uuid_data:
        return {"statusCode": 500, "body": "Failed to fetch UUID file from S3"}

    source_code = uuid_data.get('sourceCode')
    if not source_code:
        return {"statusCode": 400, "body": "No source code found in UUID JSON"}

    # Process source code with Google AI Studio
    class_names = process_reviews_with_google_ai(source_code)
    if not class_names:
        return {"statusCode": 500, "body": "Failed to process source code with Google AI Studio"}
    
    review_paginate_next, review_author, review_text, review_rating = class_names

    # Parse source code with BeautifulSoup
    soup = BeautifulSoup(source_code, 'html.parser')

    reviews = []
    titles = soup.find_all(class_=review_text)
    authors = soup.find_all(class_=review_author)
    ratings = soup.find_all(class_=review_rating)

    for i in range(min(len(titles), len(authors), len(ratings))):
        review = {
            "title": titles[i].get_text(strip=True),
            "author": authors[i].get_text(strip=True),
            "rating": ratings[i].get_text(strip=True)
        }
        reviews.append(review)

    # Return the extracted reviews
    return {
        'statusCode': 200,
        'body': json.dumps(reviews)
    }


In [114]:
import json

# Step 1: Load the JSON file
with open('source-codes.json', 'r') as file:
    data = json.load(file)

# Step 2: Create a new list to hold appended elements
new_list = []

# Step 3: Append each element from the loaded data to the new list
for item in data:
    new_list.append(item)

new_list2 = new_list[:2]
out = []
print(len(new_list2))
for src in new_list2:
    txt = str(process_reviews_with_google_ai(src))
    # txt = txt.strip()
    out.append(txt)
print(len(out[0]))


2
1199


In [115]:
print(out[0])
print(out)

['{"title": "Great Product"', ' "body": "This product is amazing! It exceeded my expectations."', ' "rating": 5', ' "reviewer": "John Doe"}', '{"title": "Love it!"', ' "body": "I am very happy with this purchase. It\'s easy to use and works perfectly."', ' "rating": 4', ' "reviewer": "Jane Smith"}', '{"title": "Disappointing"', ' "body": "The product arrived damaged and the customer service was unhelpful."', ' "rating": 1', ' "reviewer": "David Lee"}', '{"title": "Okay"', ' "body": "It\'s an okay product. Nothing special."', ' "rating": 3', ' "reviewer": "Sarah Jones"}', '{"title": "Excellent!"', ' "body": "I would definitely recommend this product to anyone looking for something like this."', ' "rating": 5', ' "reviewer": "Michael Brown"}', '{"title": "null"', ' "body": "The product was as described.  It works well and I\'m happy with my purchase."', ' "rating": 4', ' "reviewer": "Emily Wilson"}', '{"title": "Not as expected"', ' "body": "I was expecting something better based on the 

In [None]:
import json

def clean_and_convert_to_json(data):
    """
    Cleans the input data and converts it into a valid JSON array.

    Args:
        data: A list of strings representing the raw data.

    Returns:
        A list of dictionaries, where each dictionary represents a review.
    """

    cleaned_data = []
    for item in data:
        # Remove outer square brackets
        item = item.strip('[]') 
        # Remove single quotes around each key-value pair
        item = item.replace('\'', '') 
        # Parse the string into a list of dictionaries
        try:
            reviews = json.loads(f"[{item}]") 
            cleaned_data.extend(reviews)
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")

    return cleaned_data

# Sample usage
my_array = ['[\'[{"title": "Great product!"\', \' "body": "This is a fantastic product.  It\\\'s exactly what I needed and it works perfectly. I highly recommend it!"\', \' "rating": 5\', \' "reviewer": "John Doe"}\', \' {"title": "Disappointing"\', \' "body": "I was really let down by this product. It didn\\\'t meet my expectations and the quality was poor."\', \' "rating": 1\', \' "reviewer": "Jane Smith"}\', \' {"title": "Okay"\', \' "body": "It\\\'s an alright product. Nothing special\', \' but it does the job."\', \' "rating": 3\', \' "reviewer": "Robert Jones"}\', \' {"title": null\', \' "body": "I love this thing!  It\\\'s so easy to use and it makes my life so much easier."\', \' "rating": 5\', \' "reviewer": "Mary Brown"}\', \' {"title": "Could be better"\', \' "body": "The product is functional\', \' but there\\\'s room for improvement.  The design could be more user-friendly."\', \' "rating": 3\', \' "reviewer": "Michael Davis"}\', \' {"title": "Excellent"\', \' "body": "This is an excellent product.  I\\\'m very happy with my purchase."\', \' "rating": 4\', \' "reviewer": "Emily Wilson"}\', \' {"title": "Terrible"\', \' "body": "Don\\\'t waste your money on this product.  It\\\'s completely useless."\', \' "rating": 1\', \' "reviewer": "David Garcia"}\', \' {"title": null\', \' "body": "Works as expected.  No complaints."\', \' "rating": 4\', \' "reviewer": "Linda Rodriguez"}]\']', '[\'[{"title": "Great product"\', \' "body": "This is a great product. I love it!"\', \' "rating": 5\', \' "reviewer": "John Doe"}\', \' {"title": "Excellent"\', \' "body": "Excellent product\', \' highly recommended."\', \' "rating": 4\', \' "reviewer": "Jane Smith"}\', \' {"title": "Not so good"\', \' "body": "The product is not as good as expected."\', \' "rating": 2\', \' "reviewer": "Peter Jones"}\', \' {"title": "Waste of money"\', \' "body": "Complete waste of money. Don\\\'t buy it!"\', \' "rating": 1\', \' "reviewer": "Mary Brown"}\', \' {"title": null\', \' "body": "It works as expected."\', \' "rating": 3\', \' "reviewer": "David Lee"}\', \' {"title": "Love it!"\', \' "body": null\', \' "rating": 5\', \' "reviewer": "Sarah Williams"}]\']']

json_reviews = clean_and_convert_to_json(out)
print(json.dumps(json_reviews, indent=2))
with open("demo.json","w")as f:
    json.dump(json_reviews,f)


Error parsing JSON: Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
Error parsing JSON: Unterminated string starting at: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Unterminated string starting at: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Unterminated string starting at: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column 2 (char 1)
Error parsing JSON: Expecting value: line 1 column

Error parsing the data: Expecting ',' delimiter: line 1 column 6 (char 5)
Error parsing the data: Expecting ',' delimiter: line 1 column 6 (char 5)
Successfully appended data to reviews.json.
