In [None]:
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# Growth Accounting


In [None]:
select
    o_custkey as id,
    date_trunc(year, o_orderdate) as order_year,
    sum(o_totalprice) as total
from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS
group by all
order by id, order_year

In [None]:
import pandas as pd

In [None]:
df = cell3.to_pandas()

#pivot data to add row for each id:year with no revenue
result = df.pivot_table(
    index='ID',
    columns='ORDER_YEAR', 
    values='TOTAL',
    fill_value=0
).reset_index().melt(
    id_vars='ID',
    var_name='ORDER_YEAR',
    value_name='TOTAL'
)

# save the dataframe as table for SQL querying 
df = session.create_dataframe(result)
df.write.mode("overwrite").save_as_table("df", table_type="temporary")

In [None]:
select * from df
order by id, order_year
limit 10

In [None]:
with windowed as (
    
    select
        *,
        sum(total) over(partition by id order by order_year asc) as lifetime_spend,
        coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,
    from df

)

select *,
  case
    when total = previous_year_total and total > 0 then 'retained'
    when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'
    when total = 0 and previous_year_total > 0 then 'churned'
    when total > previous_year_total and previous_year_total > 0 then 'expanded'
    when total < previous_year_total and previous_year_total > 0 then 'contracted'
    when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'
  else 'irrelevant' end as category,
  case category
    when 'retained' then 0
    when 'new' then total
    when 'churned' then (-1 * previous_year_total)
    when 'expanded' then total - previous_year_total
    when 'contracted' then (-1 * (previous_year_total - total))
    when 'resurrected' then total
  else 0 end as net_change
from windowed
order by id, order_year

In [None]:
select
    date_part(year, order_year) as order_year,
    category,
    round(sum(total)) as total,
    round(sum(net_change)) as net_change
from {{ cell6 }}
group by all

In [None]:
import streamlit as st
# Option to define dictionary to color code each category, may need to use matplotlib
# Option to use altair for better control of ticks on Y axis
st.bar_chart(cell4, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)

In [None]:
df = cell6.to_pandas()
button_csv = df.to_csv().encode("utf-8")
st.download_button(label="Download", data=button_csv, file_name="growth_accounting.csv", mime="text/csv")

# Forecasting


In [None]:
select
    date_trunc(day, o_orderdate) as order_date,
    sum(o_totalprice) as total
from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS
group by 1
order by order_date asc

In [None]:
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly

In [None]:
df = cell14.to_pandas()
df = df.rename(columns={'ORDER_DATE': 'ds', 'TOTAL': 'y'})
print(df.head())

In [None]:
st.line_chart(df, x='ds', y='y')

Waiting on role permission to write UDFs for Prophet library to run properly. Until then, code cell below will return 
```<class 'Exception'> Failed with error [Errno 1] Operation not permitted: '/usr/lib/python_udf/d212b0f949a4a60cf75395f561f7016ea978bad39b2e60eee12ece87d118e861/lib/python3.9/site-packages/prophet/stan_model/prophet_model.bin'```

In [None]:
m = Prophet()
try:
    m.fit(df)
except Exception as err:
    print(Exception, err)

In [None]:
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)
fig1 = m.plot(forecast)
#fig2 = m.plot_components(forecast)

# Customer Segmentation

In [None]:
select *
from ADHOC_ANALYSIS.USER_UPLOADS.SP500_COMPANY_LIST
limit 10

In [None]:
import requests

def get_wiki_extract(title):
    # Base URL for Wikipedia's API
    url = "https://en.wikipedia.org/w/api.php"
    
    # Parameters for the API request
    params = {
        "action": "query",
        "format": "json",
        "titles": title,
        "prop": "extracts",
        "exintro": True,  # Only get the intro section
        "explaintext": True,  # Get plain text instead of HTML
    }
    
    # Make the request
    response = requests.get(url, params=params)
    
    # Check if request was successful
    if response.status_code == 200:
        data = response.json()
        # Navigate through the JSON response to get the extract
        pages = data["query"]["pages"]
        # Get the first (and only) page's extract
        page = list(pages.values())[0]
        return page.get("extract", "No extract available")
    else:
        return f"Error: {response.status_code}"

In [None]:
df = cell16.to_pandas()
company_names = df['NAME'].tolist()
csv_list = []

print("extracting descriptions")

for name in company_names:
    try:
        extract = get_wiki_extract(name.replace(" ", "_"))
        #print(f'extracted description of {name} from Wikipedia')
    except Exception as e:
        #print(f"Error getting Wikipedia extract for {name}: {str(e)}")
        extract = "None available"
        
    csv_list.append((name, extract))

print("finished extracting descriptions")

In [None]:
# save the dataframe as table for SQL querying 
df = pd.DataFrame(csv_list, columns=['name', 'description'])
df = session.create_dataframe(df)
df.write.mode("overwrite").save_as_table("prospects", table_type="temporary")

In [None]:
select "name", "description" from prospects limit 10

In [None]:
select 
    "name",
    "description",
    snowflake.cortex.classify_text(
        "description",
        ['excellent', 'average', 'poor'],
        {
            'task_description': 'Return the likelihood that this company would be interested in purchasing manufacturing equipment based on this description.'
        }
    ):label::STRING as hair_type
from prospects
-- other class. ideas: industry, main product, region