# Import Dependencies

In [1]:
from sqlalchemy import create_engine
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
# Create MySQL Database Connection
# ----------------------------------
engine = create_engine('mysql+pymysql://user:password@host/database', pool_recycle=3600)
conn = engine.connect()

In [3]:
# Confirm connection by printing table in database
engine.table_names()

['portfolio']

# Explore Data Using <font color="red">SELECT *</font>

<br>
<strong>SQL Syntax</strong><br>
SELECT *<br>
FROM table
LIMIT VALUE;

In [4]:
sql_view = "SELECT * FROM portfolio LIMIT 5;"

In [5]:
# Run query
view_data = pd.read_sql(sql_view, conn)
# Displaying subset of data
view_data

Unnamed: 0,MyUnknownColumn,mean_return,variance,pf_weights,bm_weights,Security,GICS Sector,GICS Sub Industry
0,A,0.146146,0.035194,0.0,0.0,Agilent Technologies Inc,Health Care,Health Care Equipment
1,AAL,0.444411,0.094328,0.214,0.0,American Airlines Group,Industrials,Airlines
2,AAP,0.242189,0.029633,0.0,0.0,Advance Auto Parts,Consumer Discretionary,Automotive Retail
3,AAPL,0.225074,0.027283,0.0,0.0,Apple Inc.,Information Technology,Computer Hardware
4,ABBV,0.182541,0.029926,0.0,0.0,AbbVie,Health Care,Pharmaceuticals


# Categories of Functions

<table>
    <tr>
        <th>Category</th>
        <th>Description</th>
        <th>Examples</th>
    </tr>
    <tr>
        <td>Text Functions</td>
        <td>Manipulate strings of text</td>
        <td>Extract part of a string using.<BR>
            Access uses: <code>MID()</code><br>
            DB2, Oracle, PostgreSQL, & SQLite use: <code>SUBSTR()</code><br>
            MariaDB, MySQL, and SQL Server use: <code>CONVERT()</code>
         </td>
    </tr>
        <tr>
        <td>Numeric Functions</td>
        <td>Perform mathematical operations on numeric data.</td>
        <td>To get the average.<BR>
           Most DBMS use: <code>Avg ( expression )</code><br>
         </td>
    </tr>
    <tr>
        <td>Date and Time Functions</td>
        <td>Manipulate date and time values and are used to extract specific datetime elements.</td>
        <td>To get current date.<BR>
            Access uses: <code>NOW()</code><br>
            DB2 & PostgreSQL use: <code>CURRENT_DATE()</code><br>
            Oracle uses: <code>SYSDATE()</code><br>
            MariaDB & MySQL use: <code>CURDATE()</code>
         </td>
    </tr>
        <tr>
        <td>System Functions</td>
        <td>Returns information about the specific DBMS used.</td>
        <td>Returns the user name and host name for the current user.<BR>
            MariaDB & MySQL use: <code>SYSTEM_USER()</code>
         </td>
    </tr>

</table>
    

# Example of <font color="red">Text</font> Manipulation Functions

### Example 1: <font color="red">LEFT()</font>

 - Lets you extract a substring from a string, starting from the left-most character.

In [6]:
# Extract the first 5 characters from string starting from left
sql_view1 = "SELECT Security, LEFT(Security, 5) AS Left_Trimmed FROM portfolio LIMIT 5;"

In [7]:
# Run query
view_data1 = pd.read_sql(sql_view1, conn)
# Displaying subset of data
view_data1

Unnamed: 0,Security,Left_Trimmed
0,Agilent Technologies Inc,Agile
1,American Airlines Group,Ameri
2,Advance Auto Parts,Advan
3,Apple Inc.,Apple
4,AbbVie,AbbVi


### Example 2: <font color="red">LENGTH()</font>

 - Returns the length of the specified string.

In [8]:
# Returns the length of a string
sql_view2 = "SELECT Security, LENGTH(Security) AS Length FROM portfolio LIMIT 5;"

In [9]:
# Run query
view_data2 = pd.read_sql(sql_view2, conn)
# Displaying subset of data
view_data2

Unnamed: 0,Security,Length
0,Agilent Technologies Inc,24
1,American Airlines Group,23
2,Advance Auto Parts,18
3,Apple Inc.,10
4,AbbVie,6


### Example 3: <font color="red">UCASE()</font>

 - converts all characters in the specified string to uppercase.
 - Consult DBMS, some use UPPER()

In [10]:
# Returns strings in upper case
sql_view3 = "SELECT Security, UPPER(Security) AS UpperCased FROM portfolio LIMIT 5;"

In [11]:
# Run query
view_data3 = pd.read_sql(sql_view3, conn)
# Displaying subset of data
view_data3

Unnamed: 0,Security,UpperCased
0,Agilent Technologies Inc,AGILENT TECHNOLOGIES INC
1,American Airlines Group,AMERICAN AIRLINES GROUP
2,Advance Auto Parts,ADVANCE AUTO PARTS
3,Apple Inc.,APPLE INC.
4,AbbVie,ABBVIE


# Example of <font color="red">Numeric</font> Manipulation Functions


<table>
    <tr>
        <th>Function</th>
        <th>Description</th>
    </tr>
    <tr>
        <td><code>ABS()</code></td>
        <td>Returns aboslute value</td>
    </tr>
    <tr>
        <td><code>SQRT()</code></td>
        <td>Returns the square root of a specified number</td>
    </tr>  
    <tr>
        <td><code>DIV()</code></td>
        <td>Division where n is divided by m and an integer value is returned</td>
    </tr>
    <tr>
        <td><code>MAX()</code></td>
        <td>Returns the maximum value</td>
    </tr>
    <tr>
        <td><code>MIN()</code></td>
        <td>Returns the minimum value</td>
    </tr>
    <tr>
        <td><code>ROUND(number, [ decimal_places])</code></td>
        <td>Returns the maximum value</td>
    </tr>
    <tr>
        <td><code>AVG()</code></td>
        <td>Returns the mean</td>
    </tr>
    <tr>
        <td><code>COUNT()</code></td>
        <td>Returns the number of values</td>
    </tr>    
</table>
    

### Example 1: <font color="red">AVG()</font>

 - Returns mean

In [12]:
# Returns the average of the mean_return columns
sql_view4 = "SELECT AVG(mean_return) AS AVG FROM portfolio;"

In [13]:
# Run query
view_data4 = pd.read_sql(sql_view4, conn)
# Displaying subset of data
view_data4

Unnamed: 0,AVG
0,0.148388


### Example 2: <font color="red">COUNT()</font>

 - Returns count

In [14]:
# Counts the number of instances
sql_view5 = "SELECT COUNT(mean_return) AS Count FROM portfolio;"

In [15]:
# Run query
view_data5 = pd.read_sql(sql_view5, conn)
# Displaying subset of data
view_data5

Unnamed: 0,Count
0,501


### Example 3: <font color="red">MAX()</font>

 - Returns the max value

In [16]:
# Returns the max mean revenue by stock "myuknowncolumn", 
# here we used GROUP BY, ORDER BY, and LIMIT to view the stocks with max mean_return in data.
sql_view6 = """SELECT MyUnknownColumn, MAX(mean_return) AS MAX 
                FROM portfolio GROUP BY MyUnknownColumn ORDER BY MAX DESC LIMIT 5;"""

In [17]:
# Run query
view_data6 = pd.read_sql(sql_view6, conn)
# Displaying subset of data
view_data6

Unnamed: 0,MyUnknownColumn,MAX
0,EVHC,0.631652
1,NFLX,0.561537
2,REGN,0.486075
3,URI,0.452488
4,AAL,0.444411


# Example of <font color="red">Date and Time</font> Manipulation Functions


<table>
    <tr>
        <th>Function</th>
        <th>Description</th>
    </tr>
    <tr>
        <td><code>DAYOFMONTH(date_value)</code></td>
        <td>Returns date or datetime value from which to extract the day of the month</td>
    </tr>
    <tr>
        <td><code>DATEDIFF(date1, date2)</code></td>
        <td>Returns the difference between two dates</td>
    </tr>  
    <tr>
        <td><code>STR_TO_DATE( string, format_mask )</code></td>
        <td>Returns a string value to format as a date</td>
    </tr> 
</table>
    

### Example 1: <font color="red">DAYOFMONTH()</font>

 - Returns date or datetime value

In [18]:
# Returns the average of the mean_return columns
sql_view7 = "SELECT DAYOFMONTH('2020-01-27') AS Day;"

In [19]:
# Run query
view_data7 = pd.read_sql(sql_view7, conn)
# Displaying subset of data
view_data7

Unnamed: 0,Day
0,27


### Example 2: <font color="red">DATEDIFF(date1, date2)</font>

 - Returns difference between two dates

In [20]:
# Returns the average of the mean_return columns
sql_view8 = "SELECT DATEDIFF('2017-05-1', '2018-10-11') AS Difference_days;"

In [21]:
# Run query
view_data8 = pd.read_sql(sql_view8, conn)
# Displaying subset of data
view_data8

Unnamed: 0,Difference_days
0,-528


### Example 3: <font color="red">STR_TO_DATE( string, format_mask )</font>

 - Returns a date specified by a format mask.

In [22]:
# Returns the average of the mean_return columns
# you need to use two of these %% in an sqlalchemy query
sql_view9 = "SELECT STR_TO_DATE('February 01 2014', '%%M %%d %%Y') AS Date;"

In [23]:
# Run query
view_data9 = pd.read_sql(sql_view9, conn)
# Displaying subset of data
view_data9

Unnamed: 0,Date
0,2014-02-01
