In [4]:
from langchain.sql_database import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///db.sqlite")

In [5]:
from google.cloud import aiplatform

aiplatform.init(project="odin-321417")

In [6]:
from langchain.llms import VertexAI

code_gen_llm = VertexAI(model_name="code-bison")
llm = VertexAI()

In [7]:
from langchain_core.prompts import PromptTemplate

TEMPLATE = """
You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the SQLQuery
Answer: Final answer here

Only use the following tables:
CREATE TABLE traffic (
	date_of_metrics DATE NOT NULL, 
	total_visits INTEGER DEFAULT 0 NOT NULL, 
	brand_name TEXT, 
	phone_model TEXT, 
	partner TEXT, 
	partner_country TEXT, 
	partner_region TEXT
)

/*
3 rows from traffic table:
date_of_metrics	total_visits	brand_name	phone_model	partner	partner_country	partner_region
2023-11-05	8155	Samsung	Galaxy A54	EE	UK	EMEA
2023-11-05	18278	Apple	iPhone 15	EE	UK	EMEA
2023-11-05	7693	Apple	iPhone 13	EE	UK	EMEA*/

CREATE TABLE sales_data (
	week INTEGER, 
	year INTEGER, 
	partner TEXT, 
	region_type TEXT, 
	region_breakdown TEXT, 
	product_model TEXT, 
	current_week_sales REAL, 
	quarterly_target REAL, 
	qtd_sales REAL, 
	curren_rr REAL, 
	amended_rr REAL, 
	quarterly_percentage REAL
)

/*
3 rows from sales_data table:
week	year	partner	region_type	region_breakdown	product_model	current_week_sales	quarterly_target	qtd_sales	curren_rr	amended_rr	quarterly_percentage
41	2023	AT&T	Carrier Market	DESERT SOUTHWEST	Pixel 7	20.0	5.0	2.0	10.0	5.0	100.0
41	2023	AT&T	Carrier Market	DESERT SOUTHWEST	Pixel 7 Pro	7.0	28.0	11.0	16.0	5.0	103.0
41	2023	AT&T	Carrier Market	DESERT SOUTHWEST	Pixel 7a	30.0	10.0	49.0	4.0	20.0	131.0
*/

The traffic table compiles detailed metrics on website visits, including partner, brand, and phone model data. Key columns:

date_of_metrics (Date): Records the date for time-based analysis of website visits.
total_visits (Integer): Indicates daily user engagement by storing the total visits for a specific partner, brand, and phone model.
brand_name (Text): Contains the phone brand name (Google, Apple, etc).
phone_model (Text): Stores the phone model
partner (Text): Specifies the partner store (Best buy, amazon, etc).
partner_country (Text): Captures the partner's country, providing geographical context.
partner_region (Text): Specifies the partner's region for a detailed geographical understanding.

The cumulative total visits for a brand is stored where phone model is none

Question: {input}
"""

CUSTOM_PROMPT = PromptTemplate(
    input_variables=["input"],
    template=TEMPLATE,
)

template = """
Question: {question}
SQL Query: {query}
Result: {result}
Answer: Using the sql query and result to answer the question. Write in business english"""

prompt = PromptTemplate.from_template(template)

inference_chain = prompt | llm

In [8]:

from langchain.chains import create_sql_query_chain

db_chain = create_sql_query_chain(code_gen_llm, db, prompt=CUSTOM_PROMPT)

In [9]:
def answer(question, sql=False):
    response = db_chain.invoke({"question":question})
    sql_query = response.split("```")[1][3:].strip()
    import sqlite3
    if sql:
        print(sql_query)
    conn = sqlite3.connect("db.sqlite")
    cursor = conn.cursor()
    
    try:
        cursor.execute(sql_query)
        x = cursor.fetchall()
        x = str(x)
        if sql:
            print(x)
    except Exception as e:
        print(e)
    
    print(question)
    print(inference_chain.invoke({"question": question, "query": sql_query, "result": x}))

In [10]:
answer("What is the traffic for US for last quarter?")

What is the traffic for US for last quarter?
 The total number of visits to the website from the United States in the last quarter was 1,073,153,766.


In [11]:
answer("What is the traffic for US")

What is the traffic for US
 The total traffic for the United States is 10,772,702,267.


In [12]:
answer("What is the traffic for AT&T?")

What is the traffic for AT&T?
 AT&T had a total of 3,402,526,149 visits.


In [13]:
answer("What is the traffic for AT&T for last quarter?")

What is the traffic for AT&T for last quarter?
 AT&T experienced a total traffic of 346,750,425 visits in the last quarter.


In [14]:
answer("What is the traffic for ATT?")

What is the traffic for ATT?
 AT&T had a total of 3,402,526,149 visits.


In [15]:
answer("What is the traffic for AT&T in jan")

What is the traffic for AT&T in jan
 In January 2023, AT&T had a total of 191,149,380 visits to its website from users with a known phone model.


In [16]:
answer("What is the traffic for AT&T US", sql=True)

SELECT SUM("total_visits") AS "Total Visits"
FROM traffic
WHERE partner = 'AT&T'
  AND partner_country = 'US';
[(3402526149,)]
What is the traffic for AT&T US
 AT&T US has a total of 3,402,526,149 visits.


In [17]:
answer("What is the traffic for AT&T UK")

What is the traffic for AT&T UK
 AT&T UK has no traffic data available in the provided database.


In [18]:
answer("When was the highest the traffic for AT&T", sql=True)

SELECT MAX(total_visits) AS "Maximum Visits"
FROM traffic
WHERE partner = 'AT&T';
[(44969052,)]
When was the highest the traffic for AT&T
 AT&T experienced its highest traffic with a peak of 44,969,052 total visits.


In [19]:
answer("Which country had the highest vists for google", sql=True)

SELECT partner_country, SUM(total_visits) AS total_visits
FROM traffic
WHERE brand_name = 'Google'
GROUP BY partner_country
ORDER BY total_visits DESC
LIMIT 1;
[('US', 93045177)]
Which country had the highest vists for google
 The United States had the highest number of visits to Google, with a total of 93,045,177 visits.


In [20]:
answer("Which country had the highest visits for google and samsung", sql=True)


SELECT partner_country, SUM(total_visits) AS total_visits
FROM traffic
WHERE brand_name IN ('Google', 'Samsung')
GROUP BY partner_country
ORDER BY total_visits DESC
LIMIT 1;
[('US', 465221335)]
Which country had the highest visits for google and samsung
 The United States had the highest number of visits for both Google and Samsung, with a total of 465,221,335 visits.


In [21]:
answer("Which country had the highest vists for google between june to september 2023", sql=True)

SELECT partner_country, SUM(total_visits) AS total_visits
FROM traffic
WHERE brand_name = 'Google'
AND date_of_metrics BETWEEN '2023-06-01' AND '2023-09-30'
GROUP BY partner_country
ORDER BY total_visits DESC
LIMIT 1;
[('JP', 15456739)]
Which country had the highest vists for google between june to september 2023
 **Country with the Highest Google Visits from June to September 2023**

Based on the traffic data, Japan (JP) had the highest number of visits to Google between June and September 2023. With a total of 15,456,739 visits, Japan significantly surpassed other countries during this period. This indicates that Google experienced substantial user engagement and traffic from Japan during those months.


In [22]:
answer("Which partner had the highest sales", sql=True)

SELECT partner, SUM(current_week_sales) AS "Total Sales" 
-- From the sales_data table
FROM sales_data
-- Group the results by partner
GROUP BY partner
-- Order the results by Total Sales in descending order
ORDER BY "Total Sales" DESC
-- Limit the output to the top 1 row
LIMIT 1;
[('Verizon', 78603.0)]
Which partner had the highest sales
 Verizon had the highest sales among all partners, with total sales of $78,603.00 during the specified time period.


In [23]:
answer("Which partner had the highest sales in 2022", sql=True)

SELECT 
    partner,
    SUM(current_week_sales) AS total_sales
FROM 
    sales_data
WHERE 
    year = 2022
GROUP BY 
    partner
ORDER BY 
    total_sales DESC
LIMIT 1;
[]
Which partner had the highest sales in 2022
 The partner with the highest sales in 2022 is not available in the provided data.


In [24]:
answer("Top 3 partners in terms of sales", sql=True)

SELECT partner, SUM(current_week_sales) AS total_sales
FROM sales_data
GROUP BY partner
ORDER BY total_sales DESC
LIMIT 3;
[('Verizon', 78603.0), ('T-Mobile', 70607.0), ('Best Buy', 56451.0)]
Top 3 partners in terms of sales
 The top three partners in terms of sales are Verizon, T-Mobile, and Best Buy. Verizon contributed the most sales with a total of $78,603, followed by T-Mobile with $70,607, and Best Buy with $56,451.


In [25]:
answer("Top 3 partners in terms of sales in the second half of 2023", sql=True)

SELECT partner, SUM(current_week_sales) AS total_sales
FROM sales_data
WHERE week >= 27
GROUP BY partner
ORDER BY total_sales DESC
LIMIT 3;
[('Verizon', 78603.0), ('T-Mobile', 70607.0), ('Best Buy', 56451.0)]
Top 3 partners in terms of sales in the second half of 2023
 In the second half of 2023, the top three partners in terms of sales were Verizon, T-Mobile, and Best Buy. Verizon had the highest sales, with a total of $78,603 in sales. T-Mobile was the second highest, with a total of $70,607 in sales. Best Buy was the third highest, with a total of $56,451 in sales.


In [26]:
answer("Average weekly sales of AT&T", sql=True)

SELECT AVG("current_week_sales") AS "Average Weekly Sales"
FROM sales_data
WHERE partner = 'AT&T';
[(47.654970760233915,)]
Average weekly sales of AT&T
 On average, AT&T generates $47,654.97 in weekly sales.


In [31]:
answer("Average weekly sales of AT&T in the second half of 2023", sql=True)

-- Calculate the average weekly sales for AT&T in the second half of 2023
SELECT AVG("current_week_sales") AS "Average Weekly Sales"
FROM "sales_data"
WHERE "partner" = 'AT&T'
  AND "year" = 2023
  AND "week" BETWEEN 27 AND 52;
[(47.654970760233915,)]
Average weekly sales of AT&T in the second half of 2023
 In the second half of 2023, AT&T's average weekly sales were approximately $47.65 million.


In [None]:
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit

from langchain.agents.agent_types import AgentType

agent_executor = create_sql_agent(
    llm=llm,
    toolkit=SQLDatabaseToolkit(db=db, llm=llm),
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
)

In [None]:
Sagent_executor.run(
    "What is the total Google visits in the last week visit for US?"
)