In [52]:
from typing import List
import google.generativeai as genai
import os
import numpy as np

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

def embed_content(content):
    result = genai.embed_content(
            model="models/text-embedding-004",
            task_type="SEMANTIC_SIMILARITY",
            content=content)
    return np.asarray(result['embedding'])

def array_stats(array: np.array):
    stats = {
        "mean": np.mean(array),
        "std": np.std(array),
        "max": np.max(array),
        "min": np.min(array),
        "max_index": np.argmax(array),
        "min_index": np.argmin(array),
        "quartiles": {
            "Q1": np.percentile(array, 25),
            "Q2": np.percentile(array, 50),  # Median
            "Q3": np.percentile(array, 75)
        }
    }
    return stats

def compare_arrays(array1: np.array, array2: np.array):
    # Ensure the arrays have the same length
    if len(array1) != len(array2):
        raise ValueError("Arrays must have the same length to compare.")

    # Compute element-wise differences
    differences = array1 - array2
    abs_differences = np.abs(differences)

    # Find the dimensions with the largest differences
    top_max_diff_indices = np.argsort(abs_differences)[-40:][::-1]  # Top 10 largest differences
    top_min_diff_indices = np.argsort(abs_differences)[:40]         # Top 10 smallest differences

    # Extract corresponding differences
    top_max_differences = abs_differences[top_max_diff_indices]
    top_min_differences = abs_differences[top_min_diff_indices]

    # Compute overall statistics
    stats = {
        "mean_difference": np.mean(differences),
        "std_difference": np.std(differences),
        "max_difference": np.max(abs_differences),
        "max_difference_index": np.argmax(abs_differences),
        "min_difference": np.min(abs_differences),
        "min_difference_index": np.argmin(abs_differences),
        "quartiles_of_differences": {
            "Q1": np.percentile(differences, 25),
            "Q2": np.percentile(differences, 50),  # Median
            "Q3": np.percentile(differences, 75)
        },
        "top_10_max_differences": {
            "indices": top_max_diff_indices.tolist(),
            "values": top_max_differences.tolist()
        },
        "top_10_min_differences": {
            "indices": top_min_diff_indices.tolist(),
            "values": top_min_differences.tolist()
        },
        "mean_array1": np.mean(array1),
        "mean_array2": np.mean(array2),
        "std_array1": np.std(array1),
        "std_array2": np.std(array2),
        "cosine_similarity": np.dot(array1, array2) / (np.linalg.norm(array1) * np.linalg.norm(array2))
    }

    return stats

In [76]:
test = [
        [
            """
            {
            "name": "Alice",
            "age": 30,
            "city": "New York",
            "is_active": true,
            "hobbies": ["reading", "hiking", "painting"]
            }
            """,
            """
            {
            "name": "Alice"
            "age": 30,
            "city": "New York",
            "is_active": true,
            "hobbies": ["reading", "hiking", "painting"]
            }
            """
        ],
        [
            """
            [
            {"id": 1, "product": "Laptop", "price": 1200},
            {"id": 2, "product": "Mouse", "price": 25},
            {"id": 3, "product": "Keyboard", "price": 75}
            ]
            """,
            """
            [
            {"id": 1, "product": "Laptop", "price": 1200},
            {"id": 2, "product": "Mouse", "price": 25},
            {"id": 3, "product": "Keyboard", "price": 75},
            ]
            """
        ],
        [
            """
            {
            "settings": {
                "theme": "dark",
                "notifications": true,
                "volume": 0.8
            },
            "user": {
                "username": "Bob123",
                "email": "bob@example.com"
            }
            }
            """,
            """
            {
            'settings': {
                'theme': 'dark',
                'notifications': true,
                'volume': 0.8
            },
            'user': {
                'username': 'Bob123',
                'email': 'bob@example.com'
            }
            }
            """
        ],
        [
            """
            {
            "a": [],
            "b": false,
            "c": 12.34,
            "d": [
                true,
                {
                "e": {
                    "f": "g"
                }
                }
            ]
            }
            """,
            """
            {
            "a": (),
            "b": false,
            "c": 12.34,
            "d": [
                true,
                {
                "e": {
                    "f": "g"
                }
                }
            ]
            }
            """
        ],
        [
            """
            {
                "id": "0001",
                "type": "donut",
                "name": "Cake",
                "ppu": 0.55,
                "batters":
                    {
                        "batter":
                            [
                                { "id": "1001", "type": "Regular" },
                                { "id": "1002", "type": "Chocolate" },
                                { "id": "1003", "type": "Blueberry" },
                                { "id": "1004", "type": "Devil's Food" }
                            ]
                    }
            }
            """,
            """
            {
                id: "0001",
                "type": "donut",
                "name": "Cake",
                "ppu": 0.55,
                "batters":
                    {
                        "batter":
                            [
                                { "id": "1001", "type": "Regular" },
                                { "id": "1002", "type": "Chocolate" },
                                { "id": "1003", "type": "Blueberry" },
                                { "id": "1004", "type": "Devil's Food" }
                            ]
                    }
            }
            """
        ],
        [
            """
            {"widget": {
                "debug": "on",
                "window": {
                    "name": "main_window",
                    "width": 640,
                    "height": 480
                }
            }
            }
            """,
            """
            {"widget": {
                "debug": "on",
                "window": {
                    "name": "main_window",
                    "width": 640,
                    "height": 480
            
                }
            }
            """
        ],
        [
            """
            {"menu": {
              "id": "file",
              "value": "File",
              "popup": {
                "menuitem": [
                  {"value": "New", "onclick": "CreateNewDoc()"},
                  {"value": "Open", "onclick": "OpenDoc()"},
                  {"value": "Close", "onclick": "CloseDoc()"}
                ]
              }
            }}
            """,
            """
            {"menu" {
              "id" "file",
              "value": "File",
              "popup": {
                "menuitem": [
                  {"value": "New", "onclick": "CreateNewDoc()"},
                  {"value": "Open", "onclick": "OpenDoc()"},
                  {"value": "Close", "onclick": "CloseDoc()"}
                ]
              }
            }}
            """
        ],
        [
            """
            {
                "code": "USD",
                "symbol": "$",
                "rate": "1.0000",
                "description": "United States Dollar",
                "rate_float": 1.0000
              }
            """,
            """
            {
                "code": "USD",
                "symbol": "$",
                "rate": 01.0000,
                "description": "United States Dollar",
                "rate_float": 1.0000
              }
            """
        ],
        [
            """
            {"plugin": "markdown", "ver": 2.0}
            """,
            """
            {"plugin": "markdown", "ver": true}
            """
        ],
        [
            """
             {
                "array_with_null": [1, null, 3, null, 5],
                "boolean_value": false
            }
            """,
            """
            {
                null: [1, null, 3, null, 5],
                "boolean_value": false
            }
            """
        ],
        [
            """
            {
              "users": [
                {
                  "id": 1,
                  "name": "John Doe",
                  "email": "john.doe@example.com",
                  "isActive": true,
                   "address": {
                        "street": "123 Main St",
                        "city": "Anytown",
                        "zipcode": "12345"
                      },
                  "roles": ["admin", "user"]
                },
                {
                  "id": 2,
                  "name": "Jane Smith",
                  "email": "jane.smith@example.com",
                  "isActive": false,
                   "address": {
                        "street": "456 Oak Ave",
                        "city": "Otherville",
                        "zipcode": "67890"
                      },
                  "roles": ["user"]
                }
              ]
            }
            """,
            """
            {
              "users": [
                {
                  "id": 1,
                  "name": "John Doe",
                  email: "john.doe@example.com",
                  "isActive": true,
                   "address": {
                        "street": "123 Main St",
                        "city": "Anytown",
                        "zipcode": "12345"
                      },
                  "roles": ["admin", "user"]
                },
                {
                  "id": 2,
                  "name": "Jane Smith",
                  "email": "jane.smith@example.com",
                  "isActive": false,
                   "address": {
                        "street": "456 Oak Ave",
                        "city": "Otherville",
                        "zipcode": 67890
                      },
                  "roles": ["user"]
                }
              ]
            }
            """
        ],
          [
            """
            {
              "data": [
                {
                  "name": "Product A",
                  "id": "PA123",
                  "price": 29.99,
                  "categories": ["electronics", "gadgets"],
                  "inStock": true,
                   "details":{
                        "weight": 1.2,
                         "dimensions":
                         {
                           "length":10,
                           "width":5,
                           "height":2
                         }
                      }

                },
                {
                  "name": "Product B",
                  "id": "PB456",
                  "price": 9.99,
                  "categories": ["books", "fiction"],
                  "inStock": false,
                  "details":{
                        "weight": 0.8,
                         "dimensions":
                         {
                           "length":8,
                           "width":4,
                           "height":1
                         }
                      }
                }
              ]
            }
            """,
            """
            {
              "data": [
                {
                  "name": Product A,
                  "id": "PA123",
                  "price": 29.99,
                  "categories": ["electronics", "gadgets"],
                  "inStock": true,
                   "details":{
                        "weight": 1.2,
                         "dimensions":
                         {
                           "length":10,
                           "width":5,
                           "height":2
                         }
                      }

                },
                {
                  "name": "Product B",
                  "id": "PB456",
                  "price": 9.99,
                  categories: ["books", "fiction"],
                  "inStock": false,
                  "details":{
                        "weight": 0.8,
                         "dimensions":
                         {
                           "length":8,
                           "width"4,
                           "height":1
                         }
                      }
                }
              ]
            }
            """
        ],
        [
            """
            {
              "success": true,
              "message": "Operation completed successfully",
              "timestamp": "2024-02-29T12:34:56Z",
              "payload": {
                "data": [10, 20, 30, 40, 50],
                "metadata": {
                  "totalCount": 5,
                  "averageValue": 30
                },
                "status": "active"
              }
            }
            """,
            """
            {
              "success": true,
              "message": "Operation completed successfully",
              "timestamp": 2024-02-29T12:34:56Z,
              "payload": {
                "data": [10, 20, 30, 40, 50],
                "metadata": {
                  "totalCount": 5,
                  "averageValue": 30
                },
                "status": "active"
              }
            }
            """
        ],
        [
            """
             {
              "type": "event",
              "name": "UserLoggedIn",
              "version": "1.0",
              "context": {
                "userId": "user123",
                "sessionId": "xyz789",
                "device": {
                  "type": "mobile",
                  "os": "iOS",
                  "model": "iPhone 13"
                },
                "location": {
                  "latitude": 34.0522,
                  "longitude": -118.2437
                }
              },
              "properties":{
                "loginTime":12345678,
                "method": "password"
              }
            }
            """,
            """
             {
              "type": "event",
              "name": UserLoggedIn,
              "version": "1.0",
              "context": {
                "userId": "user123",
                "sessionId": "xyz789",
                "device": {
                  "type": "mobile",
                  "os": iOS,
                  "model": "iPhone 13"
                },
                "location": {
                  "latitude": 34.0522,
                  "longitude": -118.2437
                }
              },
              "properties":{
                "loginTime":12345678,
                "method": "password"
              }
            }
            """
        ],
         [
            """
            {
                "notification": {
                    "title": "New Message",
                    "body": "You have received a new message from John Doe.",
                    "icon": "message.png",
                     "actions": [
                          {
                            "type": "button",
                            "text": "View",
                            "url": "https://example.com/message"
                          },
                          {
                            "type": "button",
                            "text": "Reply",
                             "url": "https://example.com/reply"
                          }
                        ],
                    "timestamp": 1709206800
                }
            }
            """,
            """
            {
                "notification": {
                    "title": "New Message",
                    "body": "You have received a new message from John Doe.",
                    "icon": "message.png",
                     "actions": [
                          {
                            "type": button,
                            "text": "View",
                            "url": "https://example.com/message"
                          },
                          {
                            "type": "button",
                            "text": "Reply",
                            "url": "https://example.com/reply",
                          }
                        ],
                    "timestamp": 1709206800
                }
            }
            """
        ],
        [
            """
            {
              "quiz": {
                "title": "General Knowledge Quiz",
                "description": "Test your knowledge on various topics.",
                "questions": [
                  {
                    "id": 1,
                    "text": "What is the capital of France?",
                    "options": ["London", "Berlin", "Paris", "Rome"],
                    "correctAnswer": "Paris",
                     "difficulty": "easy"
                  },
                  {
                    "id": 2,
                    "text": "Who painted the Mona Lisa?",
                    "options": ["Leonardo da Vinci", "Michelangelo", "Raphael", "Vincent van Gogh"],
                    "correctAnswer": "Leonardo da Vinci",
                     "difficulty": "medium"
                  }
                ]
              }
            }
            """,
            """
            {
              "quiz": {
                "title": "General Knowledge Quiz",
                "description": "Test your knowledge on various topics.",
                "questions": [
                  {
                    "id": 1,
                    text: "What is the capital of France?",
                    "options": ["London", "Berlin", "Paris", "Rome"],
                    "correctAnswer": "Paris",
                     "difficulty": "easy"
                  },
                  {
                    "id": 2,
                    "text": "Who painted the Mona Lisa?",
                    "options": ["Leonardo da Vinci", "Michelangelo", "Raphael", "Vincent van Gogh"],
                    "correctAnswer": "Leonardo da Vinci",
                     difficulty: "medium"
                  }
                ]
              }
            }
            """
        ],
        [
            """
            {
              "file": {
                "name": "document.pdf",
                "size": 123456,
                "type": "application/pdf",
                "metadata":{
                    "createdAt":"2024-02-20T10:00:00Z",
                    "author": "Alice Smith"
                },
                 "permissions":
                 {
                   "read":true,
                   "write":false,
                   "execute": false
                 }

              }
            }
            """,
            """
            {
              "file": {
                "name": "document.pdf",
                "size": 123456,
                "type": "application/pdf",
                "metadata":{
                    "createdAt":"2024-02-20T10:00:00Z",
                    "author": Alice Smith
                },
                 "permissions":
                 {
                   "read":true,
                   "write":false
                   "execute": false
                 }

              }
            }
            """
        ],
        [
            """
            {
                "store": {
                  "name": "City Grocery",
                    "location": "123 Main Street",
                    "products": [
                      {
                        "id": "A101",
                        "name": "Fresh Apples",
                         "price": 1.99,
                        "category": "Produce",
                           "supplier":
                           {
                             "name": "Farm Fresh Inc.",
                              "contact": "555-1234"
                            }

                      },
                      {
                         "id": "B202",
                         "name": "Whole Wheat Bread",
                         "price": 3.49,
                        "category": "Bakery",
                            "supplier":
                            {
                             "name": "Golden Grain Bakery",
                             "contact": "555-5678"
                            }
                      }
                    ]
                  },
                  "hours": "8am-8pm"
            }
            """,
            """
            {
                "store": {
                  "name": "City Grocery",
                  "location": "123 Main Street",
                    "products": [
                      {
                        "id": "A101",
                        name: "Fresh Apples",
                         "price": 1.99,
                        "category": "Produce",
                           "supplier":
                           {
                             "name": "Farm Fresh Inc.",
                              "contact": "555-1234"
                            }

                      },
                      {
                         "id": "B202",
                         "name": "Whole Wheat Bread",
                         price: 3.49,
                        "category": "Bakery",
                            "supplier":
                            {
                             "name": "Golden Grain Bakery",
                             "contact": "555-5678"
                            }
                      }
                    ],
                  "hours": "8am-8pm"
            }
            """
        ]
]

In [77]:
def top10diff(arrays):
    res = [] 
    for array in arrays:   
        differences = embed_content(array[0]) - embed_content(array[1])
        abs_differences = np.abs(differences)

        # Find the dimensions with the largest differences
        top_max_diff_indices = np.argsort(abs_differences)[-40:][::-1]
        res.append(top_max_diff_indices)
    return res

In [78]:
res1 = top10diff(test)

In [97]:
import numpy as np
from collections import Counter

def calculate_occurrences_ordered(arrays, order_by="frequency"):
    """
    Calculate the occurrences of elements across a list of arrays and order the results.

    Parameters:
    arrays (list of np.array): A list of arrays to analyze.
    order_by (str): Order the results by 'frequency' (default) or 'value'.

    Returns:
    list of tuple: A list of tuples where each tuple contains an element and its occurrence count.
                The list is ordered as specified by the `order_by` parameter.
    """
    # Flatten all arrays into one list
    all_elements = np.concatenate(arrays)
    
    # Count occurrences of each element
    counts = Counter(all_elements)
    
    # Order the results
    if order_by == "frequency":
        ordered_occurrences = sorted(counts.items(), key=lambda x: x[1], reverse=True)
    elif order_by == "value":
        ordered_occurrences = sorted(counts.items(), key=lambda x: x[0])
    else:
        raise ValueError("Invalid order_by parameter. Use 'frequency' or 'value'.")
    
    return ordered_occurrences

differs = calculate_occurrences_ordered(res1)[:15]
differs

[(np.int64(54), 5),
 (np.int64(167), 5),
 (np.int64(433), 4),
 (np.int64(87), 4),
 (np.int64(118), 4),
 (np.int64(151), 4),
 (np.int64(127), 4),
 (np.int64(520), 4),
 (np.int64(168), 4),
 (np.int64(642), 4),
 (np.int64(505), 3),
 (np.int64(554), 3),
 (np.int64(65), 3),
 (np.int64(198), 3),
 (np.int64(51), 3)]

(np.int64(433), 3),
 (np.int64(174), 3),
 (np.int64(505), 2),
 (np.int64(292), 2),
 (np.int64(2), 2)]

In [104]:
testt=         [
            """
            {
                "notification": {
                    "title": "New Message",
                    "body": "You have received a new message from John Doe.",
                    "icon": "message.png",
                     "actions": [
                          {
                            "type": "button",
                            "text": "View",
                            "url": "https://example.com/message"
                          },
                          {
                            "type": "button",
                            "text": "Reply",
                             "url": "https://example.com/reply"
                          }
                        ],
                    "timestamp": 1709206800
                }
            }
            """,
            """
            {
                "notification": {
                    "title": "New Message",
                    "body": "You have received a new message from John Doe.",
                    "icon": "message.png",
                     "actions": [
                          {
                            "type": button,
                            "text": "View",
                            "url": "https://example.com/message"
                          },
                          {
                            "type": "button",
                            "text": "Reply",
                            "url": "https://example.com/reply",
                          }
                        ],
                    "timestamp": 1709206800
                }
            }
            """
        ]
valid = compare_arrays(embed_content(testt[0]),embed_content(testt[1]))
valid

{'mean_difference': np.float64(-2.3507846127604332e-05),
 'std_difference': np.float64(0.0010159183378449393),
 'max_difference': np.float64(0.003132959499999999),
 'max_difference_index': np.int64(54),
 'min_difference': np.float64(9.720000000001255e-07),
 'min_difference_index': np.int64(672),
 'quartiles_of_differences': {'Q1': np.float64(-0.0007363442499999993),
  'Q2': np.float64(-1.917849999999617e-05),
  'Q3': np.float64(0.0006774242499999993)},
 'top_10_max_differences': {'indices': [54,
   25,
   700,
   755,
   419,
   87,
   668,
   276,
   407,
   152,
   323,
   272,
   59,
   42,
   109,
   98,
   27,
   238,
   65,
   686,
   709,
   162,
   740,
   151,
   196,
   541,
   457,
   119,
   261,
   231,
   254,
   451,
   290,
   427,
   268,
   168,
   532,
   620,
   279,
   565],
  'values': [0.003132959499999999,
   0.0029608679999999984,
   0.0029573290000000016,
   0.0028378269999999994,
   0.0027322600000000002,
   0.0027049687,
   0.0027024752000000003,
   0.002681

In [105]:
np.intersect1d(valid['top_10_max_differences']['indices'],[d[0] for d in differs])

array([ 54,  65,  87, 151, 168])