In [5]:
# Import the dependencies:
import pandas as pd
import sqlalchemy
import numpy as np

# Set up the database connection string 
database_connection_string = 'sqlite:///'

# Create the engine for the temporary database:
engine = sqlalchemy.create_engine(database_connection_string)

In [6]:
# LIMITING AND ORDERING DATA
# We know how to select data in database tables to do more targeted analyses.
# Now let's discuss limiting and ordering that data.
# In SQL, we use the `LIMIT` clause to control the number of data elements that a query returns.
# To sort the data by using SQL, we use the `ORDER` clause.
# We use this cluase together with the `ASC` or `DESC` keyword to specify whether to sort the values in ascending or descending order.
# Both the `LIMIT` and the `ORDER` clause help evaluate the data that a SQL query returns.

In [7]:
# LIMIT DATA
# In Pandas, we use indexing to select a specific range of data that we need.
# With Pandas, we use either the `head` function or indexing via `iloc` to get those three rows:
# Set the reandom seed so we all get the same random data:
np.random.seed(0)
stocks_df = pd.DataFrame(
    np.random.randint(0,100, size=(10,2)),
    columns=['AAPL', 'GOOG']
)

# To view the data, use either the iloc function or the head function:
display(stocks_df.iloc[:3])
display(stocks_df.head(3))

Unnamed: 0,AAPL,GOOG
0,44,47
1,64,67
2,67,9


Unnamed: 0,AAPL,GOOG
0,44,47
1,64,67
2,67,9


In [8]:
# Note that the DataFrame consists of two columns: 'AAPL' and 'GOOG'. 
# Only three of the ten rows are displayed.
# The elements in each column consist of whole numerical values in the range from 0 to 100.
# In SQL, we can similarly limit the number of results that return. 
# We do this by adding a `LIMIT` clause to our `SELECT` statement.
# The following syntax shows the `SELECT` statement with the `LIMIT` clause included:
    # SELECT column_name, column_name, ...
    # FROM table_name
    # LIMIT number_of_entries;
# To limit the number of returned results, we can thus change any `SELECT` statement to include a `LIMIT` clause.
# Let's now try an example.
# Say that we want to update the `stocks` table, which consists of to rows of data.
# Our query to the database gets only the first three rows:
stocks_df.to_sql('stocks_df', engine, index=False, if_exists='replace')
query = """
SELECT *
FROM stocks_df
LIMIT 3;
"""
limit_df = pd.read_sql_query(query, con=engine)
limit_df

Unnamed: 0,AAPL,GOOG
0,44,47
1,64,67
2,67,9


In [9]:
# Notice that the DataFrame matches the ones that we generated earlier, when we accessed the first three rows of data by using `iloc[:3]` and `head(3)`.
# In all cases, we get the first three rows of the 'AAPL' and 'GOOG' columns from the table.
# We can also use the `LIMIT` clause with more advanced `SELECT` queries, such as those that include both `WHERE` and `LIMIT` clauses.
# For example, the following code selects all the values where the 'AAPL' value is less than 50, and it then limits the results to the first three rows:
query = """
SELECT *
FROM stocks_df
WHERE AAPL < 50
LIMIT 3;
"""

where_limit_df = pd.read_sql_query(query, con=engine)
where_limit_df

Unnamed: 0,AAPL,GOOG
0,44,47
1,36,87
2,39,87


In [10]:
# Notice that all the values from the 'AAPL' column are less than 50.
# This restriction, however, doesn't apply to the 'GOOG column. 
# The output thus shows values that are greater than 50 in the 'GOOG' column.
# Setting limits in SQL queries can optimize the data transfer between Pandas and SQL. 
# This proves beneficial when we create financial applications that query a database and then return the results.
# The reason is that we get only the data we need.
# Limits also prove useful when we create top-n and bottom-n lists - something that analysts love to do.
# For example, an analysis might need to find the top 10 stocks with the highest daily returns - and we can use limits to do this.
# However, one problem exists: We haven't yet been able to sort our results.
# To get the ture top-n or bottom-n list, we need to order the results before we limit them.
# Otherwise, we'll return the first n rows of data from the table that meet the criteria rather than the top-n or bottom-n results.

In [None]:
# ORDER DATA
# In SQL, you order data by adding the `ORDER BY` clause to the `SELECT` statement:
    # SELECT column_name, column_name, ...
    # FROM table_name,
    # ORDER BY column_name, column_name, ... ASC | DESC;
# Notice that in the `ORDER BY` clause, you specify the columns by which you want to order the results.
# You can also sort the results by ascending or descending order.
# Let's change our previous query to sort the results in the 'AAPL' column in descending order:
query = """
SELECT * 
FROM stocks_df
WHERE AAPL < 50
ORDER BY AAPL DESC
LIMIT 3;
"""

orderby_df = pd.read_sql_query(query, con=engine)
orderby_df