In [1]:
from functions import read_sql_query, llm_response, apply_model
from langchain import HuggingFaceHub, PromptTemplate, LLMChain
import warnings
warnings.filterwarnings('ignore')

### Defining the Prompt ###
template = """
Context: You are an expert in converting English questions to SQL lite queries!
The SQL database has the name OpenFoodTox.db and has the following table Substance_Characterization with the following columns: Substance, has, Component, CASNumber,
ECRefNo, MolecularFormula, and smiles.
For example:
Example 1 - How many substances are available within OpenFoodTox?
The SQL command will be something like this: SELECT COUNT(DISTINCT Substance) FROM Substance_Characterization;
Example 2 - What is the CAS number of the Substance trans-3-Hexenyl hexanoate?
The SQL command will be something like this: SELECT CASNumber FROM Substance_Characterization WHERE Substance = 'trans-3-Hexenyl hexanoate';
Also, the SQL command should not have ' and the () at the beginning or at the end of the SQL word in the output.Your output should be just the SQL command. 
For example:
If I ask you 'What is the CAS number of the Substance trans-3-Hexenyl hexanoate?' Your output must be just the SQL command, like this:  SELECT CASNumber FROM Substance_Characterization WHERE Substance = 'trans-3-Hexenyl hexanoate'.
Don't Say anything else!!!!!
Question: {query}

SQL QUERY: """

prompt = PromptTemplate(input_variables=['query'],
                                       template = template)

#### Testing The Model ####

# Questions
questions = ["How many substances are available within the OpenFoodTox database?",
             "Within OpenFoodTox how many substances have a CASnumber?",
             "What is the CASnumber for the following substance '(+)-13alpha-Tigloyloxylupanine'?",
             "What is the  CASnumber for the following substance '(-)-alpha-Santalene'?",
             "What is the CASnumber for the following substance '(2E)-3-(2-Anilino-6-methyl-4-pyrimidinyl)-2-propen-1-ol'?",
             "What is the substance name from the following CASnumber: 110-44-1' ? ",
             "What is the substance name from the following CASnumber: 19342-01-9' ?",
             "What is the molecular formula from the following CASnumber: 6119-92-2 ? ",
             "What is the smiles from the following CASnumber: 1113-21-9?",
             "What is the smiles from the following CASnumber: 110235-47-7?",
             "What is the smiles from the following Substance: (+)-Alpha-cedrene?",
             "What is the ECRefNo for the following Substance: (2E,7R,11R)-Phytol ?",
             "What is the Substance name of the following smiles: C[C@@H](CCC[C@@H](C)CCC/C(=C/COC(=O)C)/C)CCCC(C)C ?",
             "Regarding the 2,3,4,6-Tetrachloro-5-cyanobenzamide substance what is it's component and smiles? ",
             "Tell me the cas number of the following substance: 1,2,3-Trimethoxybenzene"]

# Applying the model to each question
model_name = "google/gemma-7b"
for question in questions:
    print(question)
    apply_model(model_name,question, prompt)
    print('')


How many substances are available within the OpenFoodTox database?
SELECT COUNT(DISTINCT Substance) FROM Substance_Characterization;
Query result:
(5146,)

Within OpenFoodTox how many substances have a CASnumber?
SELECT COUNT(DISTINCT CASNumber) FROM Substance_Characterization;
Query result:
(4528,)

What is the CASnumber for the following substance '(+)-13alpha-Tigloyloxylupanine'?
SELECT CASNumber FROM Substance_Characterization WHERE Substance = '(+)-13alpha-Tigloyloxylupanine';
Query result:
('57943-34-7',)

What is the  CASnumber for the following substance '(-)-alpha-Santalene'?
SELECT CASNumber FROM Substance_Characterization WHERE Substance = '(-)-alpha-Santalene';
Query result:
('512-61-8',)

What is the CASnumber for the following substance '(2E)-3-(2-Anilino-6-methyl-4-pyrimidinyl)-2-propen-1-ol'?
SELECT CASNumber FROM Substance_Characterization WHERE Substance = '(2E)-3-(2-Anilino-6-methyl-4-pyrimidinyl)-2-propen-1-ol';
Query result:
(None,)
('110235-47-7',)

What is the su