In [None]:
# Import python packages
import streamlit as st

# Import Snowpark packages
from snowflake.snowpark.context import get_active_session
snowflake_session = get_active_session()

In [None]:
use schema "DEMO";

# Inferring schemas directly

These files were all uploaded into an internal stage earlier, so we can dive straight into inferring the schema.

Inferring a schema from a file directly is achieved using a table function. This uses a very similar structure regardless of the file format:

```sql
table(
  infer_schema(
      location => '<location of file(s), including stage>'
    , file_format => '<Snowflake File Format object to use when reading the file>'
  )
)
```


## Inferring schemas directly - CSV

Let's quickly see the output from this table function using our example CSV file:

In [None]:
select *
from table(
  infer_schema(
      location => '@"STG__DATA"/csv'
    , file_format => '"FF_CSV"'
  )
)

## Inferring schemas directly - JSON

And now let's see the output from this table function using our example JSON file:

In [None]:
select *
from table(
  infer_schema(
      location => '@"STG__DATA"/json'
    , file_format => '"FF_JSON"'
  )
)

## Inferring schemas directly - Parquet

Finally, let's see the output from this table function using our example Parquet file:

In [None]:
select *
from table(
  infer_schema(
      location => '@"STG__DATA"/parquet'
    , file_format => '"FF_PARQUET"'
  )
)

In [None]:
st.header("Inferring schemas directly - Summary")

st.markdown("So we have three very similar queries that output similar results, however there are some important differences:")

isd_col_csv, isd_col_json, isd_col_parquet = st.columns(3, gap="large")
with isd_col_csv:
  st.subheader("CSV")
  st.code(sql__inferring_schemas_directly__csv.__getattribute__("query_executed"), "sql")
  st.markdown("""
    Metadata inferred from CSV data is the least reliable
      - Number accuracy is estimated
      - Strings may incorrectly be inferred as other types
  """)
with isd_col_json:
  st.subheader("JSON")
  st.code(sql__inferring_schemas_directly__json.__getattribute__("query_executed"), "sql")
  st.markdown("""
    Metadata inferred from JSON data is still not optimal
      - Number accuracy is estimated
  """)
with isd_col_parquet:
  st.subheader("Parquet")
  st.code(sql__inferring_schemas_directly__parquet.__getattribute__("query_executed"), "sql")
  st.markdown("""
    Metadata inferred from Parquet files is exact
      - Metadata is stored within the file format
  """)
