In [9]:
from snowflake.snowpark.session import Session
from snowflake.snowpark.functions import avg, sum, col,lit
import configparser
import pandas as pd

parser = configparser.ConfigParser()
parser.read("config.yml")
user = parser.get("Credentials", "username")
password = parser.get("Credentials", "password")
acctName = parser.get("Credentials", "account")
wh = parser.get("Credentials", "warehouse")
dbname = parser.get("Credentials", "database")
schema = parser.get("Credentials", "schema")
role = parser.get("Credentials", "role")

In [10]:
import streamlit as st
st.set_page_config(
page_title="COVID-19 Epidemiological Data",
page_icon="🧊",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'https://developers.snowflake.com',
'About': "This is an *extremely* cool app powered by Snowpark for Python, Streamlit, and Snowflake Data Marketplace"
}
)

In [11]:
connection_parameters = {
   "account": acctName,
   "user": user,
   "password": password,
   "warehouse": wh,
   "role": role,
   "database": dbname,
   "schema": schema
}
session = Session.builder.configs(connection_parameters).create()


# test if we have a connection
session.sql("select current_warehouse() wh, current_database() db, current_schema() schema, current_version() v,current_role() role").show()

--------------------------------------------------------------------------------
|"WH"        |"DB"                          |"SCHEMA"  |"V"     |"ROLE"        |
--------------------------------------------------------------------------------
|COMPUTE_WH  |COVID19_EPIDEMIOLOGICAL_DATA  |PUBLIC    |7.22.1  |ACCOUNTADMIN  |
--------------------------------------------------------------------------------



In [12]:
snow_df_covid_cases = session.table("ECDC_GLOBAL")
snow_df_covid_cases = snow_df_covid_cases.group_by('COUNTRY_REGION').agg(sum('CASES').alias("Total Number of COVID Cases")).sort('COUNTRY_REGION')
snow_df_covid_cases.show()

-------------------------------------------------------
|"COUNTRY_REGION"     |"Total Number of COVID Cases"  |
-------------------------------------------------------
|Afghanistan          |49273.0                        |
|Albania              |48530.0                        |
|Algeria              |92102.0                        |
|Andorra              |7338.0                         |
|Angola               |16188.0                        |
|Anguilla             |10.0                           |
|Antigua and Barbuda  |148.0                          |
|Argentina            |1498160.0                      |
|Armenia              |148682.0                       |
|Aruba                |5049.0                         |
-------------------------------------------------------



In [13]:

list_df_covid_cases = snow_df_covid_cases.collect()


In [14]:
pandas_df = pd.DataFrame(list_df_covid_cases, columns=["COUNTRY","Total Number of COVID Cases"])

In [10]:
print(type(pandas_df))


<class 'pandas.core.frame.DataFrame'>


In [15]:
print(pandas_df)

               COUNTRY  Total Number of COVID Cases
0          Afghanistan                      49273.0
1              Albania                      48530.0
2              Algeria                      92102.0
3              Andorra                       7338.0
4               Angola                      16188.0
..                 ...                          ...
209  Wallis and Futuna                          3.0
210     Western Sahara                        766.0
211              Yemen                       2083.0
212             Zambia                      18274.0
213           Zimbabwe                      11246.0

[214 rows x 2 columns]


In [16]:
st.header("Starschema: COVID-19 Epidemiological Data")
st.subheader("Powered by Snowpark for Python and Snowflake Data Marketplace | Made with Streamlit")

2023-07-06 12:03:58.646 
  command:

    streamlit run C:\Users\ac97599\Anaconda3\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [17]:
with st.container():
   st.subheader('Number of Covid Cases by Country')
   st.dataframe(pandas_df)


with st.container():
   st.subheader('Number of COVID Cases in Top 10 Countries')
   with st.expander(""):
      pd_top_n = pandas_df.sort_values('Total Number of COVID Cases', ascending=False).head(10)
      st.bar_chart(data=pd_top_n.set_index('COUNTRY'), width=850, height=500, use_container_width=True)

In [18]:
filtered_df=snow_df_covid_cases.filter(col('COUNTRY_REGION')=="India")

In [19]:
filtered_df.show()

----------------------------------------------------
|"COUNTRY_REGION"  |"Total Number of COVID Cases"  |
----------------------------------------------------
|India             |9884100.0                      |
----------------------------------------------------

