In [19]:
import pandas as pd
import json
import os

# Read the Excel file into a DataFrame
excel_file = '../inputdata/mapping.xlsx'
sheet_name_sources = 'Sources'
sheet_name_targets = 'Target'
df_sources = pd.read_excel(excel_file, sheet_name=sheet_name_sources)
df_targets = pd.read_excel(excel_file, sheet_name=sheet_name_targets)

# Convert the table DataFrame to a list of dictionaries
json_data_sources = df_sources.to_json(orient='records')
json_data_targets = df_targets.to_json(orient='records')
prompt = f'''
You are an accurate sql mapping assistant 
write a big query sql that uses the sources and maps to the target definition.
The Jsons contain the metadata of the sources and targets.
Important : Dont Use the Json in the SQL.
Json for the Sources = {json_data_sources}  
Json for the Targets = {json_data_targets}
The sql should be accurate and should not contain any errors.
The sql should be modular but no unnecessary repitition.

example of getting latest order by customer id
Query :
SELECT OrderId, OrderDate, CustomerId
from
(
    Select OrderId,
    OrderDate,
    CustomerId,
    rownum() over (partition by CustomerId order by OrderId desc) as rn 
    FROM Orders 
) OrderRNK
where rn = 1
'''

print(prompt)



You are an accurate sql mapping assistant 
write a big query sql that uses the sources and maps to the target definition.
The Jsons contain the metadata of the sources and targets.
Important : Dont Use the Json in the SQL.
Json for the Sources = [{"SourceTableName":"Ptnt101","SourceColumnName ":"ID","SourceColumnDataType":"int","UniqueDataValues ":"1,2","TableDescription":"patient table","Nullable":"N"},{"SourceTableName":"Ptnt101","SourceColumnName ":"firstname","SourceColumnDataType":"varchar","UniqueDataValues ":"Raj,Kamal","TableDescription":"patient table","Nullable":"Y"},{"SourceTableName":"Ptnt101","SourceColumnName ":"lastname","SourceColumnDataType":"varchar","UniqueDataValues ":"Kumar,Hassan","TableDescription":"patient table","Nullable":"Y"},{"SourceTableName":"PTINS101","SourceColumnName ":"ptntid","SourceColumnDataType":"int","UniqueDataValues ":"1,2","TableDescription":"patient insurance bridge","Nullable":"N"},{"SourceTableName":"PTINS101","SourceColumnName ":"insid","S

In [20]:
import os
from dotenv import load_dotenv

# Load the environment variables from the .env file
load_dotenv()

# Access the API key
api_key = os.getenv('GEMINI_API_KEY')

import os
import requests


# Set the API endpoint and request body
url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent'
payload = {
    "contents": [
        {
            "parts": [
                {
                    "text": prompt
                }
            ]
        }
    ]
}

# Make the API request
headers = {
    'Content-Type': 'application/json'
}
response = requests.post(url, headers=headers, json=payload, params={'key': api_key})

# Check the response
if response.status_code == 200:
    print(response.json())
else:
    print(f'Error: {response.status_code} - {response.text}')


data = json.loads(response.text)
#print(data)


# Extract the SQL code text
sql_code = data['candidates'][0]['content']['parts'][0]['text']

# Remove the leading and trailing triple backticks
sql_code = sql_code.strip('```')

print(sql_code)


{'candidates': [{'content': {'parts': [{'text': '```sql\nWITH PatientBase AS (\n  SELECT\n    Ptnt101.ID AS PatientId,\n    CONCAT(Ptnt101.firstname, " ", Ptnt101.lastname) AS PatientName\n  FROM Ptnt101\n), PatientInsurance AS (\n  SELECT\n    PatientId,\n    MAX(ValidFrom) AS LatestValidFrom\n  FROM PTINS101\n  GROUP BY\n    PatientId\n), PatientLatestInsurance AS (\n  SELECT\n    PatientId,\n    LatestValidFrom\n  FROM PatientInsurance\n), PatientInsuranceDetails AS (\n  SELECT\n    PatientId,\n    PTINS101.ValidFrom,\n    PTINS101.insid AS InsuranceId,\n    INS101.name AS InsuranceName\n  FROM PTINS101\n  JOIN INS101\n    ON PTINS101.insid = INS101.Id\n  WHERE\n    PatientId IN (\n      SELECT\n        PatientId\n      FROM PatientLatestInsurance\n    )\n), PatientLatestInsuranceAddress AS (\n  SELECT\n    PatientId,\n    MAX(ValidFrom) AS LatestValidFrom\n  FROM PatientInsuranceDetails\n  GROUP BY\n    PatientId\n), PatientAddress AS (\n  SELECT\n    PatientId,\n    ADD101.ptntid,