In [60]:
from google.cloud import aiplatform
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents.agent_types import AgentType
from langchain.chains import create_sql_query_chain
from langchain.llms import VertexAI
from langchain.sql_database import SQLDatabase

In [61]:
db = SQLDatabase.from_uri("sqlite:///db.sqlite")

In [62]:
aiplatform.init(project="odin-321417")

In [81]:
from langchain_core.prompts import PromptTemplate

TEMPLATE = """
You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the SQLQuery
Answer: Final answer here

Only use the following tables:
CREATE TABLE traffic (
	date_of_metrics DATE NOT NULL, 
	total_visits INTEGER DEFAULT 0 NOT NULL, 
	brand_name TEXT, 
	phone_model TEXT, 
	partner TEXT, 
	partner_country TEXT, 
	partner_region TEXT
)

/*
10 rows from traffic table:
date_of_metrics	total_visits	brand_name	phone_model	partner	partner_country	partner_region
2023-11-05	8155	Samsung	Galaxy A54	EE	UK	EMEA
2023-11-05	18278	Apple	iPhone 15	EE	UK	EMEA
2023-11-05	7693	Apple	iPhone 13	EE	UK	EMEA
2023-11-05	7934	Google	Pixel 8 Pro	EE	UK	EMEA
2023-11-05	961	Apple	iPhone SE 3rd Generation	EE	UK	EMEA
2023-11-05	20482	Apple	iPhone 15 Pro	EE	UK	EMEA
2023-11-05	1521	Apple	iPhone 15 Plus	EE	UK	EMEA
2023-11-05	8133	Samsung	Galaxy Z Flip5	EE	UK	EMEA
2023-11-05	5704	Samsung	Galaxy Z Fold5	EE	UK		EMEA
2023-11-05	603	Apple	iPhone 12 Pro Max	EE	UK	EMEA*/

The traffic table captures comprehensive metrics related to website visits for various partners, brands, and phone models. The data is organized to provide insights into user interactions over time.

Columns:

date_of_metrics (Date): Represents the date of the recorded metrics, allowing a time-based analysis of website visits.

total_visits (Integer): Stores the total number of visits on a particular day for a specific partner, brand, and phone model. This column serves as a key metric for assessing daily user engagement.

brand_name (Text): Contains the name of the phone brand. A value of 'None' indicates that the corresponding row represents visits on the partner's website rather than a specific brand.

phone_model (Text): Stores the name of the phone model. A value of 'None' signifies visits on the brand's website, providing insights into the popularity of different phone models.

partner (Text): Holds the name of the partner associated with the website. This column enables the identification of trends and performance metrics for each partner.

partner_country (Text): Represents the country of the partner, providing geographical context to the data. Useful for analyzing regional variations in website visits.

partner_region (Text): Specifies the region of the partner, offering additional granularity in understanding the geographical distribution of website interactions.


Question: {question}
"""

CUSTOM_PROMPT = PromptTemplate(
    input_variables=["question"],
    template=TEMPLATE,
)

In [82]:
toolkit = SQLDatabaseToolkit(db=db, llm=VertexAI())

In [83]:
agent_executor = create_sql_agent(
    llm=VertexAI(),
    toolkit=toolkit,
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    prompt=CUSTOM_PROMPT
)

In [89]:
llm = VertexAI(temperature=0, verbose=True)
db_chain = create_sql_query_chain(llm, db, prompt=CUSTOM_PROMPT)

In [100]:
response = db_chain.invoke({"question": "What is the total Google visits in the last week visit for US?"})
print(response)



```sql
SELECT SUM("total_visits") AS "Total Google Visits"
FROM "traffic"
WHERE "date_of_metrics" BETWEEN date('now', '-7 days') AND date('now')
  AND "partner_country" = 'US'
  AND "brand_name" = 'Google';
```

```sql


In [101]:
response = db_chain.invoke({"question": "Give day wise Google visits in the last year for UK?"})
print(response)

```sql
SELECT date_of_metrics, SUM(total_visits) AS total_google_visits
FROM traffic
WHERE brand_name = 'Google'
AND partner_country = 'UK'
AND date_of_metrics >= date('now', '-1 year')
GROUP BY date_of_metrics
ORDER BY date_of_metrics;
```

```sql


In [98]:
response = db_chain.invoke({"question": "Average visits per brand"})
print(response)


```sql
-- Calculate the average total visits for each brand
SELECT "brand_name", AVG("total_visits") AS "Average Visits"
-- Group the results by brand name
GROUP BY "brand_name"
-- Order the results by average visits in descending order
ORDER BY "Average Visits" DESC;
```

```sql
-- SQLite query to calculate the average total visits for each brand
SELECT
    "brand_name",
    AVG("total_visits") AS "Average Visits" -- Calculate the average total visits
FROM
    traffic -- Specify the "traffic" table as the data source


In [57]:
agent_executor.run("Percentage of traffic apple vs google vs oppo vs samsung")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Action: sql_db_list_tables
Action Input: [0m
Observation: [38;5;200m[1;3mtraffic[0m
Thought:[32;1m[1;3m The traffic table seems relevant.  I should query the schema of the traffic table.
Action: sql_db_schema
Action Input: traffic[0m
Observation: [33;1m[1;3m
CREATE TABLE traffic (
	date_of_metrics DATE NOT NULL, 
	total_visits INTEGER DEFAULT 0 NOT NULL, 
	brand_name TEXT, 
	phone_model TEXT, 
	partner TEXT, 
	partner_country TEXT, 
	partner_region TEXT
)

/*
3 rows from traffic table:
date_of_metrics	total_visits	brand_name	phone_model	partner	partner_country	partner_region
2023-11-12	3418573	None	None	EE	UK	EMEA
2023-11-12	64686	Apple	None	EE	UK	EMEA
2023-11-12	3924	Oppo	None	EE	UK	EMEA
*/[0m
Thought:[32;1m[1;3m The brand_name column seems relevant.  I should query the traffic table for the percentage of traffic for each brand_name.
Action: sql_db_query
Action Input: SELECT brand_name, CAST(total_visits as FLOA

'Apple accounts for 0.43% of the total traffic, Google accounts for 2.11%, Oppo accounts for 0.21%, and Samsung accounts for 0.53%.'

In [47]:
agent_executor.run("Total number of visits on samsung's galaxy lineup") 



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Action: sql_db_list_tables
Action Input: [0m
Observation: [38;5;200m[1;3mtraffic[0m
Thought:[32;1m[1;3m The traffic table seems relevant.  I should query the schema of the traffic table.
Action: sql_db_schema
Action Input: traffic[0m
Observation: [33;1m[1;3m
CREATE TABLE traffic (
	date_of_metrics DATE NOT NULL, 
	total_visits INTEGER DEFAULT 0 NOT NULL, 
	brand_name TEXT, 
	phone_model TEXT, 
	partner TEXT, 
	partner_country TEXT, 
	partner_region TEXT
)

/*
3 rows from traffic table:
date_of_metrics	total_visits	brand_name	phone_model	partner	partner_country	partner_region
2023-11-12	3418573	None	None	EE	UK	EMEA
2023-11-12	64686	Apple	None	EE	UK	EMEA
2023-11-12	3924	Oppo	None	EE	UK	EMEA
*/[0m
Thought:[32;1m[1;3m The brand_name column seems relevant.  I should query the traffic table for the total visits for Samsung brand phones.
Action: sql_db_query
Action Input: SELECT SUM(total_visits) FROM traffic WHERE bran

"The total number of visits on Samsung's Galaxy lineup is 18,189,325."

In [48]:
agent_executor.run("Total number of visits on 2023-11-05 on samsung's galaxy lineup")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Action: sql_db_list_tables
Action Input: [0m
Observation: [38;5;200m[1;3mtraffic[0m
Thought:[32;1m[1;3m The traffic table seems relevant.  I should query its schema to see what columns it has.
 Action: sql_db_schema
Action Input: traffic[0m
Observation: [33;1m[1;3m
CREATE TABLE traffic (
	date_of_metrics DATE NOT NULL, 
	total_visits INTEGER DEFAULT 0 NOT NULL, 
	brand_name TEXT, 
	phone_model TEXT, 
	partner TEXT, 
	partner_country TEXT, 
	partner_region TEXT
)

/*
3 rows from traffic table:
date_of_metrics	total_visits	brand_name	phone_model	partner	partner_country	partner_region
2023-11-12	3418573	None	None	EE	UK	EMEA
2023-11-12	64686	Apple	None	EE	UK	EMEA
2023-11-12	3924	Oppo	None	EE	UK	EMEA
*/[0m
Thought:[32;1m[1;3m The brand_name column seems relevant.  I should query the distinct values in that column to see what brands are in the database.
 Action: sql_db_query
Action Input: SELECT DISTINCT brand_name FRO

"The total number of visits on 2023-11-05 on Samsung's Galaxy lineup is 83343."

In [49]:
agent_executor.run("Compare number of visits on samsung's galaxy lineup and apple's iphone 14 lineup on 2023-11-05")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Action: sql_db_list_tables
Action Input: [0m
Observation: [38;5;200m[1;3mtraffic[0m
Thought:[32;1m[1;3m The traffic table seems relevant.  I should query its schema to see what columns are available.
 Action: sql_db_schema
Action Input: traffic[0m
Observation: [33;1m[1;3m
CREATE TABLE traffic (
	date_of_metrics DATE NOT NULL, 
	total_visits INTEGER DEFAULT 0 NOT NULL, 
	brand_name TEXT, 
	phone_model TEXT, 
	partner TEXT, 
	partner_country TEXT, 
	partner_region TEXT
)

/*
3 rows from traffic table:
date_of_metrics	total_visits	brand_name	phone_model	partner	partner_country	partner_region
2023-11-12	3418573	None	None	EE	UK	EMEA
2023-11-12	64686	Apple	None	EE	UK	EMEA
2023-11-12	3924	Oppo	None	EE	UK	EMEA
*/[0m
Thought:[32;1m[1;3m The brand_name and phone_model columns seem relevant.  I should query the data for the specified date and filter by brand_name and phone_model.
 Action: sql_db_query
Action Input: SELECT b

'The Samsung Galaxy lineup had significantly more visits than the Apple iPhone 14 lineup on 2023-11-05.'

In [54]:
agent_executor.run("What is the total Google visits between 2023-11-12 and 2022-11-12 for US?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Action: sql_db_list_tables
Action Input: [0m
Observation: [38;5;200m[1;3mtraffic[0m
Thought:[32;1m[1;3m The traffic table seems relevant.  I should query the schema of the traffic table.
 Action: sql_db_schema
Action Input: traffic[0m
Observation: [33;1m[1;3m
CREATE TABLE traffic (
	date_of_metrics DATE NOT NULL, 
	total_visits INTEGER DEFAULT 0 NOT NULL, 
	brand_name TEXT, 
	phone_model TEXT, 
	partner TEXT, 
	partner_country TEXT, 
	partner_region TEXT
)

/*
3 rows from traffic table:
date_of_metrics	total_visits	brand_name	phone_model	partner	partner_country	partner_region
2023-11-12	3418573	None	None	EE	UK	EMEA
2023-11-12	64686	Apple	None	EE	UK	EMEA
2023-11-12	3924	Oppo	None	EE	UK	EMEA
*/[0m
Thought:[32;1m[1;3m The date_of_metrics column seems relevant.  I should query the traffic table for the total_visits column, filtered by the date_of_metrics column between 2023-11-12 and 2022-11-12 and the partner_countr

'None'