# Menu Database Analysis and Report
### For Analytics, we should use the menu_cstore database
Because column store is optimized to run ad hoc queries on billions of rows without indexes,
combining columnar data on low-cost object storage with multi-threaded query processing
which is perfect for cloud data warehousing/analytics.

In [1]:
import pandas as pd
from IPython.display import display
import mysql.connector

In [2]:
# Functions that used to connect with the cloud database and display results.
def create_connection():
    conn = None
    try:
       conn = mysql.connector.connect(
          user="DB00004175",
          password="tTNlXEirHh)EmwXhtp4koTR",
          host="menu-data-db.mdb0002067.db.skysql.net",
          ssl_ca="skysql_chain.pem",
          port=5001)
       conn.autocommit = False
       return conn
    except mysql.connector.Error as e:
       print(f"Error connecting to MariaDB Platform: {e}")
    return conn

def sql_execute(conn, sql):

    try:
        cur = conn.cursor()
        cur.execute(sql, multi=True)
        conn.commit()
    except mysql.connector.Error as e:
        print(e)

def sql_execute_show(conn, sql):

    try:
        df = pd.read_sql(sql, con = conn)
        display(df)
        return df
    except mysql.connector.Error as e:
        print(e)

# Querying Example
- Only call the create_connection() function once
- Use the sql_execute_show() function to display results in python
- Remember to close connection at the end of your session

In [3]:
# Only call once
conn = create_connection()

In [5]:
# sql_execute_show() conn parameter = conn
# sql parameter = the string of you SQL query
sql_execute_show(conn, sql = '''
SELECT year, AVG(calories), AVG(total_fat), AVG(Protein) FROM menu_cstore.nutrition_facts
GROUP BY year
;
''')

Unnamed: 0,year,AVG(calories),AVG(total_fat),AVG(Protein)
0,2008,477.7172,26.1575,18.9965
1,2010,369.407,17.5817,12.5405
2,2012,439.7074,32.7203,24.1088
3,2013,370.0542,16.1378,13.4993
4,2014,375.3779,16.7372,13.4871
5,2015,407.4768,18.4459,15.2967
6,2016,378.0453,16.8043,14.0246
7,2017,374.0719,16.9589,13.9133
8,2018,386.7493,17.497,14.7468


In [16]:
sql_execute_show(conn, sql = '''
SELECT b.r_name, AVG(calories), AVG(total_fat), AVG(Protein)
FROM menu_cstore.nutrition_facts a
LEFT JOIN menu_cstore.restaurant b ON a.r_id = b.r_id
GROUP BY b.r_name
;
''')

Unnamed: 0,r_name,AVG(calories),AVG(total_fat),AVG(Protein)
0,,459.4577,24.4788,17.7381
1,7 Eleven,289.9115,14.1728,9.8063
2,Applebee's,535.8140,29.0960,23.8558
3,Arby's,347.4370,15.9087,13.2212
4,Auntie Anne's,275.5607,5.2865,4.4921
...,...,...,...,...
92,Whataburger,434.8670,18.6071,13.5658
93,White Castle,378.2370,13.7935,8.1544
94,Wingstop,220.3913,11.6316,12.3684
95,Yard House,697.2137,40.0058,30.8827


In [17]:
conn.close()


# MariaDB ROLLUP Syntax Example
Reference: https://mariadb.com/kb/en/select-with-rollup/

**For each restaurant in the Entrées category,
perform a ROLLUP query, then use pandas pivot function to generate a nicer table**


In [16]:
df_rollup = sql_execute_show(conn, sql = '''
SELECT
    COALESCE(b.r_name, "All Restaurants") AS "Restaurant",
    COALESCE(year, "All Years") AS "Year",
    AVG(calories), AVG(total_fat), AVG(Protein)
FROM menu_cstore.nutrition_facts a
LEFT JOIN menu_cstore.restaurant b ON a.r_id = b.r_id
WHERE b.r_name != "None" AND a.cat_id = 5
GROUP BY b.r_name, year WITH ROLLUP
;
''')

Unnamed: 0,Restaurant,Year,AVG(calories),AVG(total_fat),AVG(Protein)
0,7 Eleven,2013,212.5000,9.6250,8.3750
1,7 Eleven,2014,213.3333,7.7778,8.2222
2,7 Eleven,2015,217.2727,8.5455,9.0909
3,7 Eleven,2016,210.0000,9.6667,14.0000
4,7 Eleven,2017,,,
...,...,...,...,...,...
715,Zaxby's,2016,950.1818,53.7727,72.9091
716,Zaxby's,2017,957.0000,55.5789,68.3684
717,Zaxby's,2018,1085.5660,52.9811,89.4906
718,Zaxby's,All Years,974.4121,67.8794,77.1709


**For the average calories for each restaurant in the Entrées category over the years**

In [18]:
df_rollup.pivot(index=["Restaurant"], columns="Year", values="AVG(calories)")

Year,2008,2010,2012,2013,2014,2015,2016,2017,2018,All Years
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
7 Eleven,,,,212.5000,213.3333,217.2727,210.0000,,,214.1935
All Restaurant,,,,,,,,,,674.8127
Applebee's,345.0000,,888.6207,831.2069,767.9104,956.4935,755.2941,742.5532,781.9565,822.5980
Arby's,387.2857,414.0000,345.0000,335.0000,390.0000,390.0000,400.0000,387.5000,505.7143,403.2750
BJ's Restaurant & Brewhouse,,,,,,852.9091,855.3061,821.2500,826.8056,838.2589
...,...,...,...,...,...,...,...,...,...,...
Whataburger,617.7778,590.5556,565.5556,684.1379,684.8276,659.0909,647.9412,656.1538,665.3659,649.7297
White Castle,,,460.0000,460.0000,230.0000,367.1429,1284.0000,1284.0000,280.0000,874.2857
Wingstop,,,,,,,278.9333,278.9333,278.9333,278.9333
Yard House,,,,,,976.0380,1052.6471,1014.5161,1028.7097,1007.0686


**For the average protein for each restaurant in the Entrées category over the years**

In [55]:
#df_rollup.pivot(index=["r_name"], columns="year", values="AVG(Protein)")

**For the average total_fat for each restaurant in the Entrées category over the years**

In [56]:
#df_rollup.pivot(index=["r_name"], columns="year", values="AVG(total_fat)")

## We can also add the food_category in the GROUP BY clause, to make it a three dimensional cube

In [5]:
df_rollup2 = sql_execute_show(conn, sql = '''
SELECT
    COALESCE(year, "All Years") AS "Year",
    COALESCE(b.r_name, "All Restaurants") AS "Restaurant",
    COALESCE(c.cat_name, "All Categories") AS "Category",
    AVG(calories), AVG(total_fat), AVG(Protein)
FROM menu_cstore.nutrition_facts a
LEFT JOIN menu_cstore.restaurant b ON a.r_id = b.r_id
LEFT JOIN menu_cstore.food_category c ON a.cat_id = c.cat_id
WHERE r_name != "None" AND cat_name != "None" #AND r_name = "Applebee's"
GROUP BY b.r_name, c.cat_name, year WITH ROLLUP
;
''')

Unnamed: 0,Year,Restaurant,Category,AVG(calories),AVG(total_fat),AVG(Protein)
0,2013,7 Eleven,Appetizers & Sides,66.6667,2.3333,5.3333
1,2014,7 Eleven,Appetizers & Sides,80.0000,4.0000,6.0000
2,2015,7 Eleven,Appetizers & Sides,85.0000,5.0000,4.5000
3,2016,7 Eleven,Appetizers & Sides,80.0000,4.0000,6.0000
4,2017,7 Eleven,Appetizers & Sides,,,
...,...,...,...,...,...,...
7328,2017,Zaxby's,Toppings & Ingredients,151.8182,10.5714,3.3896
7329,2018,Zaxby's,Toppings & Ingredients,172.7941,14.7353,1.6471
7330,All Years,Zaxby's,Toppings & Ingredients,149.5472,14.4843,3.3007
7331,All Years,Zaxby's,All Categories,435.8547,29.8580,21.9276


In [6]:
df_rollup2.pivot(index=["Restaurant", "Category"], columns="Year", values="AVG(calories)")

Unnamed: 0_level_0,Year,2008,2010,2012,2013,2014,2015,2016,2017,2018,All Years
Restaurant,Category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
7 Eleven,All Categories,,,,,,,,,,289.9115
7 Eleven,Appetizers & Sides,,,,66.6667,80.0000,85.0000,80.0000,,,75.7143
7 Eleven,Baked Goods,,,,,,,,,,
7 Eleven,Beverages,,,,99.0000,103.4286,101.0000,130.6667,180.0000,100.0000,122.9556
7 Eleven,Burgers,,,,440.0000,440.0000,440.0000,,,,440.0000
...,...,...,...,...,...,...,...,...,...,...,...
Zaxby's,Entrees,900.3125,788.0714,909.9444,909.9444,1001.0000,1003.6842,950.1818,957.0000,1085.5660,974.4121
Zaxby's,Fried Potatoes,554.2000,485.3750,475.0000,452.6000,568.0000,568.0000,572.0000,572.0000,482.0000,523.9787
Zaxby's,Salads,555.6000,586.7000,500.4545,506.3636,548.2143,576.0000,522.1875,522.1875,507.3077,535.5000
Zaxby's,Sandwiches,1087.5714,1086.8889,895.5000,885.0000,958.5000,940.4545,990.4545,990.4545,904.0909,949.3590
