# Demo Setup

In [1]:
import connect
import requests
from automates.program_analysis.JSON2GroMEt.json2gromet import json_to_gromet
from automates.gromet.query import query


The following assumes that you have already generated the json (e.g., by running the notebook script `py_src_to_grometFN_JSON.ipynb`)

We first set up access to the MIRA Epidemiology Domain Knowledge Graph (DKG) web service.

In [2]:
MIRA_DKG_URL = 'http://34.230.33.149:8771'

def get_mira_dkg_term(term, attribs):
    res = requests.get(MIRA_DKG_URL + '/api/search', params={'q': term})
    term = [entity for entity in res.json() if entity['id'].startswith('askemo')][0]
    res = {attrib: term.get(attrib) for attrib in attribs if term.get(attrib) is not None}
    return res

# Load CHIME SIR gromet representation

In [3]:
gromet_fn_module = json_to_gromet("gromet/CHIME_SIR_while_loop--Gromet-FN-auto.json")

The following uses the simple query to collect all named output ports of a GrometFNModule.
`nops` will hold a `List` of `Tuples`, where each tuple has the following format:
```
(<name_of_output_port>,    # named output port == a variable in source code
 <literal_value>,          # IF a literal value has been assigned, otherwise None
 <source_code_reference>)  # metadatum for the source code location of the assignment
```

In [4]:
nops = query.collect_named_output_ports(gromet_fn_module)
print(nops[0])

('inv_contact_rate', None, {'code_file_reference_uid': '09a64930-e9e4-8ae5-f825-bdf7c1e729f9',
 'col_begin': 4,
 'col_end': 50,
 'line_begin': 3,
 'line_end': 3,
 'metadata_type': 'source_code_reference',
 'provenance': {'method': 'skema_code2fn_program_analysis',
                'timestamp': '2022-11-30 16:18:33.012848'}})


# Load all the source, document, and other files we need

In [49]:
from IPython.display import IFrame

####
# HERE IS CONTENT FOR PART 2: FORMULA-TO-MODEL-MATCHING
####
# CHIME SVIIR SOURCE
CHIME_SVIIvR_model_source = connect.read_text_from_file("model/SVIIvR/CHIME_SVIIvR.py")

# Formula LaTeX source
CHIME_SVIIvR_FORMULAS_PDF_DOC = "model/SVIIvR/CHIME_SVIIvR_model_equations.pdf"
#formula_pdf_file = "model/SV"
CHIME_SVIIvR_FORMULAS_LATEX = connect.read_text_from_file("model/SVIIvR/formula.tex_idx")


####
# HERE IS CONTENT FOR PART 3: TEXT-TO-MODEL-MATCHING
####
CHIME_SIR_model_source = connect.read_text_from_file("model/SIR/CHIME_SIR_while_loop.py")
CHIME_SIR_DESCRIPTION_PDF_DOC = "model/SIR/CHIME-online-manual-T2021-01-19.pdf";
CHIME_SIR_DESCRIPTION_TEXT = connect.read_text_from_file(connect.index_text("model/SIR/description.txt"))

###
# HERE IS CONTENT FOR PART 4: BUCKY/DATA
###
BUCKY_model_source = connect.read_text_from_file("./model/Bucky/bucky_sample.py")



# Load Ontology 

Next, we choose a set of terms and attributes to pull into a local ontology that we can use to connect to the model. In addition, for some of the terms, we define custom ranges in which their values for this model can be adjusted.

In [5]:
# Terms we want to find in MIRA and specific attributes we want to add to our local ontology
terms = ['population', 'doubling time', 'recovery time', 'infectious time']
attribs = ['description', 'synonyms', 'xrefs', 'suggested_unit', 'suggested_data_type',
           'physical_min', 'physical_max', 'typical_min', 'typical_max']

# The local ontology if filled up from the MIRA DKG
LOCAL_ONTOLOGY = {
    term: get_mira_dkg_term(term, attribs) for term in terms
    }

# We can also set further local / use-case specific constraints as needed
LOCAL_ONTOLOGY['population']['typical_min'] = 1000
LOCAL_ONTOLOGY['population']['typical_max'] = 40_000_000

LOCAL_ONTOLOGY

{'population': {'description': 'The number of people who live in an area being modeled.',
  'synonyms': [],
  'xrefs': [{'id': 'ido:0000509', 'type': 'skos:exactMatch'}],
  'suggested_unit': 'person',
  'suggested_data_type': 'int',
  'physical_min': 0.0,
  'typical_min': 1000,
  'typical_max': 40000000},
 'doubling time': {'description': 'The length of time that an infectious disease requires to double in incidence.',
  'synonyms': [{'value': 'doubling rate',
    'type': 'oboInOwl:hasRelatedSynonym'}],
  'xrefs': [{'id': 'cemo:epidemic_doubling_time', 'type': 'skos:exactMatch'}],
  'suggested_unit': 'day',
  'suggested_data_type': 'float',
  'physical_min': 0.0},
 'recovery time': {'description': 'The length of time an infected individual needs to recover after being infected.',
  'synonyms': [{'value': 'mean recovery time',
    'type': 'oboInOwl:hasExactSynonym'}],
  'xrefs': [],
  'suggested_unit': 'day',
  'suggested_data_type': 'float',
  'physical_min': 0.0},
 'infectious time': 

# DEMO: Model-to-Resource Matching

# Demo Part 1: Ontology Matching

### 1a. Ontology to Gromet matching

In [6]:
targets = ['population', 'infectious time']
terms = list(LOCAL_ONTOLOGY.keys())

parameters = set()
var_dict = {}
for nop in nops:
    if nop[1] is not None:
        parameters.add(nop[0])
        var_dict[nop[0]] = nop

discoveredParameterConnections = connect.match_gromet_targets(targets, list(parameters), var_dict, terms)
discoveredParameterConnections

[('population', {'s_n': 'grometSubObject'}, 1000.0, 81),
 ('infectious time', {'infections_days': 'grometSubObject'}, 14.0, 68)]

### 1b. Ontology to code matching

In [7]:
code = "model/SIR/CHIME_SIR_while_loop.py"
targets = ['population', 'infectious time']
discoveredParameterConnections = []
try:
    discoveredParameterConnections = connect.match_code_targets(targets, code, terms)
except OpenAIError as err:
    print("OpenAI connection error:", err)
    print("Using hard-coded connections")
    discoveredParameterConnections = [("infectious time", {"name": "grometSubObject"}, 14.0, 67),("population", {"name": "grometSubObject"}, 1000, 80)]

discoveredParameterConnections

Extracted variables:  [(3, 'inv_contact_rate', '1.0'), (11, 'growth_rate', '0'), (13, 'growth_rate', '2.0'), (34, 'index', '0'), (35, 'p_idx', '0'), (41, 'd_idx', '0'), (64, 'i_day', '17.0'), (65, 'n_days', '20'), (66, 'N_p', '3'), (67, 'N_t', '121'), (68, 'infections_days', '14.0'), (69, 'relative_contact_rate', '0.05'), (70, 'gamma', '1.0'), (81, 's_n', '1000'), (82, 'i_n', '1'), (83, 'r_n', '1'), (85, 'p_idx', '0')]


[('population', {'s_n': 'grometSubObject'}, 1000.0, 81),
 ('infectious time', {'infections_days': 'grometSubObject'}, 14.0, 68)]

# Demo Part 2: Formula-to-Model Matching

## We need the CHIME SVIIvR model...

In [34]:
print(CHIME_SVIIvR_model_source)


import sys
from csv import DictWriter, QUOTE_NONNUMERIC

def get_beta(intrinsic_growth_rate, gamma, susceptible, relative_contact_rate):
    """
    Calculates a rate of exposure given an intrinsic growth rate for COVID-19
    :param intrinsic_growth_rate: Rate of spread of COVID-19 cases
    :param gamma: The expected recovery rate from COVID-19 for infected individuals
    :param susceptible: Current amount of individuals that are susceptible
    :param relative_contact_rate: The relative contact rate amongst individuals in the population
    :return: beta: The rate of exposure of individuals to persons infected with COVID-19
    """
    inv_contact_rate = 1.0 - relative_contact_rate  # The inverse rate of contact between individuals in the population ## get_beta_icr_exp
    updated_growth_rate = intrinsic_growth_rate + gamma  # The intrinsic growth rate adjusted for the recovery rate from infection ## get_beta_ugr_exp
    beta = updated_growth_rate / susceptible * inv_contact_rate  

## And a document with some descriptive formulas... 

In [35]:
IFrame(CHIME_SVIIvR_FORMULAS_PDF_DOC, width=600, height=300)


## We used a tool to extract the source LaTeX from the PDF...

In [36]:
print(CHIME_SVIIvR_FORMULAS_LATEX)


1	S^\prime &=& - \beta S I - \beta S I_{v} - v_{r} S
2	V^\prime &=& v_{r} S - v_{s} V I - v_{s} V I_{v}
3	I^\prime &=& \beta S I + \beta S I_{v} - \gamma I
4	I_{v}^\prime &=& v_{s} V I + v_{s} V I_{v} - \gamma I_{v}
5	R^\prime &=& \gamma I + \gamma I_v



## Finally, we can find connections between the model and its descriptive formulas

In [33]:
# REMINDER: CHANGE THIS SO THE API TAKES IN THE ACTUAL VALUES, NOT THE PATHS
connect.formula_code_connection(CHIME_SVIIvR_model_source, formula)

OSError: [Errno 63] File name too long: 'import sys\nfrom csv import DictWriter, QUOTE_NONNUMERIC\n\ndef get_beta(intrinsic_growth_rate, gamma, susceptible, relative_contact_rate):\n    """\n    Calculates a rate of exposure given an intrinsic growth rate for COVID-19\n    :param intrinsic_growth_rate: Rate of spread of COVID-19 cases\n    :param gamma: The expected recovery rate from COVID-19 for infected individuals\n    :param susceptible: Current amount of individuals that are susceptible\n    :param relative_contact_rate: The relative contact rate amongst individuals in the population\n    :return: beta: The rate of exposure of individuals to persons infected with COVID-19\n    """\n    inv_contact_rate = 1.0 - relative_contact_rate  # The inverse rate of contact between individuals in the population ## get_beta_icr_exp\n    updated_growth_rate = intrinsic_growth_rate + gamma  # The intrinsic growth rate adjusted for the recovery rate from infection ## get_beta_ugr_exp\n    beta = updated_growth_rate / susceptible * inv_contact_rate  ## get_beta_beta_exp\n\n    return beta\n\n\ndef get_growth_rate(doubling_time):\n    """\n    Calculate the expected growth rate of COVID-19 infections given a doubling time\n    :param doubling_time: The time required for the amount of COVID-19 cases to double\n    :return: growth_rate: Rate of spread of COVID-19 cases.\n    """\n    ## ggr_cond\n    if doubling_time == 0:  ## ggr_cond_b0_cond\n        growth_rate = 0  ## ggr_cond_b0_exp\n    else:\n        growth_rate = 2.0 ** (1.0 / doubling_time) - 1.0  ## ggr_cond_b1_exp\n\n    return growth_rate\n\n\ndef sir(s, v, i, i_v, r, vaccination_rate, beta, gamma_unvaccinated, gamma_vaccinated, vaccine_efficacy, n):\n    """\n    The SIR model, one time step\n    :param s: Current amount of individuals that are susceptible\n    :param v: Current amount of individuals that are vaccinated\n    :param i: Current amount of individuals that are infectious\n    :param i_v: Current amount of vaccinated individuals that are infectious\n    :param r: Current amount of individuals that are recovered\n    :param beta: The rate of exposure of individuals to persons infected with COVID-19\n    :param gamma_unvaccinated: Rate of recovery for infected unvaccinated individuals\n    :param gamma_vaccinated: Rate of recovery for infected vaccinated individuals\n    :param vaccination_rate: The rate of vaccination of susceptible individuals\n    :param vaccine_efficacy: The efficacy of the vaccine\n    :param n: Total population size\n    :return:\n    """\n    s_n = (\n                      -beta * s * i - beta * s * i_v - vaccination_rate * s) + s  # Update to the amount of individuals that are susceptible ## sir_s_n_exp\n    v_n = (vaccination_rate * s - beta * (1 - vaccine_efficacy) * v * i - beta * (\n                1 - vaccine_efficacy) * v * i_v) + v  # Update to the amount of individuals that are susceptible ## sir_v_n_exp\n    i_n = (\n                      beta * s * i + beta * s * i_v - gamma_unvaccinated * i) + i  # Update to the amount of individuals that are infectious ## sir_i_n_exp\n    i_v_n = (beta * (1 - vaccine_efficacy) * v * i + beta * (\n                1 - vaccine_efficacy) * v * i_v - gamma_vaccinated * i_v) + i_v  # Update to the amount of individuals that are infectious ## sir_i_v_n_exp\n    r_n = gamma_vaccinated * i_v + gamma_unvaccinated * i + r  # Update to the amount of individuals that are recovered ## sir_r_n_exp\n\n    scale = n / (\n                s_n + v_n + i_n + i_v_n + r_n)  # A scaling factor to compute updated disease variables ## sir_scale_exp\n\n    s = s_n * scale  ## sir_s_exp\n    v = v_n * scale  ## sir_v_exp\n    i = i_n * scale  ## sir_i_exp\n    i_v = i_v_n * scale  ## sir_i_v_exp\n    r = r_n * scale  ## sir_r_exp\n    return s, v, i, i_v, r\n\n\ndef sim_sir(s, v, i, i_v, r, vaccination_rate, gamma_unvaccinated, gamma_vaccinated, vaccine_efficacy, i_day,\n            ### original inputs\n            N_p, betas, days,  ### changes to original CHIME sim_sir to simplify policy bookkeeping\n            d_a, s_a, v_a, i_a, i_v_a, r_a, e_a,\n            ### changes to original CHIME sim_sir simulation bookkeeping - here, bookkeeping represented as lists that are passed in as arguments\n            ):\n    n = s + v + i + i_v + r  ## simsir_n_exp\n    d = i_day  ## simsir_d_exp\n\n    ### total_days from original CHIME sim_sir was used to determine the size of the\n    ### the state bookkeeping across the simulation.\n    ### Here, the array size is for this bookkeeping is determined outside of sim_sir\n    ### and the arrays are passed in as arguments.\n\n    index = 0  ## simsir_idx_exp\n    for p_idx in range(N_p):  ## simsir_loop_1\n        beta = betas[p_idx]  ## simsir_loop_1_beta_exp\n        n_days = days[p_idx]  ## simsir_loop_1_N_d_exp\n        for d_idx in range(n_days):  ## simsir_loop_1_1\n            d_a[index] = d  ## simsir_loop_1_1_T_exp\n            s_a[index] = s  ## simsir_loop_1_1_S_exp\n            v_a[index] = v  ## simsir_loop_1_1_V_exp\n            i_a[index] = i  ## simsir_loop_1_1_I_exp\n            i_v_a[index] = i_v  ## simsir_loop_1_1_I_V_exp\n            r_a[index] = r  ## simsir_loop_1_1_R_exp\n            e_a[\n                index] = i + i_v + r  # updated "ever" infected (= i + i_v + r)  ### In CHIME sir.py, this is performed at end as sum of two numpy arrays; here perform iteratively\n\n            index += 1  ## simsir_loop_1_1_idx_exp\n\n            s, v, i, i_v, r = sir(s, v, i, i_v, r, vaccination_rate, beta, gamma_unvaccinated, gamma_vaccinated,\n                                  vaccine_efficacy, n)  ## simsir_loop_1_1_call_sir_exp\n\n            d += 1  ## simsir_loop_1_1_d_exp\n\n    # Record the last update (since sir() is called at the tail of the inner loop above)\n    d_a[index] = d  ## simsir_T_exp\n    s_a[index] = s  ## simsir_S_exp\n    v_a[index] = v  ## simsir_V_exp\n    i_a[index] = i  ## simsir_I_exp\n    i_v_a[index] = i_v  ## simsir_I_exp\n    r_a[index] = r  ## simsir_R_exp\n\n    return s, v, i, i_v, r, d_a, s_a, v_a, i_a, i_v_a, r_a, e_a  ### return\n\n\ndef main():\n    """\n    implements generic CHIME configuration without hospitalization calculation\n    initializes parameters and population, calculates policy, and runs dynamics\n    :return:\n    """\n    ###\n\n    # initial parameters\n    i_day = 17.0  ## main_i_day_exp\n    n_days = [14, 90]  ## main_n_days_exp\n    N_p = 2  ## main_N_p_exp\n    N_t = sum(n_days) + 1  ## main_N_t_exp\n    infectious_days_unvaccinated = 14  ## main_inf_days_u_exp\n    infectious_days_vaccinated = 10  ## main_inf_days_v_exp\n    relative_contact_rate = [0.0, 0.45]  ## main_rcr_exp\n    gamma_unvaccinated = 1.0 / infectious_days_unvaccinated  ## main_gamma_u_exp\n    gamma_vaccinated = 1.0 / infectious_days_vaccinated  ## main_gamma_v_exp\n\n    # Vaccination parameters\n    vaccination_rate = 0.02  ## main_vaccination_rate_exp\n    vaccine_efficacy = 0.85  ## main_vaccine_efficacy_exp\n\n    # initialize lists for policy and simulation state bookkeeping\n    policys_betas = [0.0] * N_p  ## TODO size      # main_pbetas_seq\n    policy_days = [0] * N_p  ## main_pdays_seq\n    d_a = [0.0] * N_t  ## main_T_seq\n    s_a = [0.0] * N_t  ## main_S_seq\n    v_a = [0.0] * N_t  ## main_V_seq\n    i_a = [0.0] * N_t  ## main_I_seq\n    i_v_a = [0.0] * N_t  ## main_I_V_seq\n    r_a = [0.0] * N_t  ## main_R_seq\n    e_a = [0.0] * N_t  # "ever" infected (= I + R) ## main_E_seq\n\n    # initial population\n    s_n = 1000  ## main_s_n_exp\n    v_n = 0  ## main_v_n_exp\n    i_n = 1  ## main_i_n_exp\n    i_v_n = 0  ## main_i_v_n_exp\n    r_n = 0  ## main_r_n_exp\n\n    # calculate beta under policy\n    for p_idx in range(N_p):  ## main_loop_1\n        doubling_time = 2\n\n        growth_rate = get_growth_rate(doubling_time)  ## main_loop_1_gr_exp\n        beta = get_beta(growth_rate, gamma_unvaccinated, s_n,  ## main_loop_1_beta_exp\n                        relative_contact_rate[p_idx])\n        policys_betas[p_idx] = beta  ## main_loop_1_pbetas_exp\n        policy_days[p_idx] = n_days[p_idx]  ## main_loop_1_pdays_exp\n\n    # simulate dynamics (corresponding roughly to run_projection() )\n    s_n, v_n, i_n, i_v_n, r_n, d_a, s_a, v_a, i_a, i_v_a, r_a, e_a \\\n        = sim_sir(s_n, v_n, i_n, i_v_n, r_n, vaccination_rate, gamma_unvaccinated, gamma_vaccinated, vaccine_efficacy,\n                  i_day,  ## main_call_simsir_exp\n                  N_p, policys_betas, policy_days,\n                  d_a, s_a, v_a, i_a, i_v_a, r_a, e_a)\n\n    return d_a, s_a, v_a, i_a, i_v_a, r_a, e_a  # return simulated dynamics\n\n\nif __name__ == \'__main__\':\n    outfile = sys.argv[1]\n\n    d_a, s_a, v_a, i_a, i_v_a, r_a, e_a = main()\n\n    out_data = list()\n    keys = [\'d\', \'s\', \'v\', \'i\', \'i_v\', \'r\', \'e\']\n    data_tuples = zip(d_a, s_a, v_a, i_a, i_v_a, r_a, e_a)\n    for dat in data_tuples:\n        out_data.append(\n            dict(zip(keys, dat))\n        )\n\n    with open(outfile, \'w\') as csvfile:\n        writer = DictWriter(csvfile, fieldnames=keys, quoting=QUOTE_NONNUMERIC)\n        writer.writeheader()\n        writer.writerows(out_data)'

# Demo Part 3: Text-to-Model Matching

## We need the CHIME SIR model...


In [41]:
print(CHIME_SIR_model_source)

def get_beta(intrinsic_growth_rate, gamma,           
             susceptible, relative_contact_rate):    
    inv_contact_rate = 1.0 - relative_contact_rate  
    updated_growth_rate = intrinsic_growth_rate + gamma  
    beta = updated_growth_rate / susceptible * inv_contact_rate 
 
    return beta  

def get_growth_rate(doubling_time): 
    if doubling_time == 0:  
        growth_rate = 0  
    else:
        growth_rate = 2.0 ** (1.0 / doubling_time) - 1.0 
    return growth_rate 

def sir(s, i, r, beta, gamma, n): 
    s_n = (-beta * s * i) + s  
    i_n = (beta * s * i - gamma * i) + i 
    r_n = gamma * i + r  

    scale = n / (s_n + i_n + r_n) 

    s = s_n * scale 
    i = i_n * scale 
    r = r_n * scale 
    return s, i, r  

def sim_sir(s, i, r, gamma, i_day, 
            N_p, betas, days, 
            d_a, s_a, i_a, r_a, e_a  
            ):
    n = s + i + r 
    d = i_day 
    index = 0  
    p_idx = 0 

    while p_idx < N_p:  
        beta = betas[p_idx]  
        n_da

## And a PDF that describes the SIR model...

In [47]:
IFrame(CHIME_SIR_DESCRIPTION_PDF_DOC, width=600, height=300)


## We use a tool to extract the textual description of the SIR model from a few pages of the doc...

In [48]:
print(CHIME_SIR_DESCRIPTION_TEXT)

0	Discrete-time SIR modeling of infections/recovery
1	The model consists of individuals who are either Susceptible (S), Infected (I), or Recovered (R).
2	The epidemic proceeds via a growth and decline process. This is the core model of infectious disease spread and has been in use in epidemiology for many years.
3	The dynamics are given by the following 3 equations.
4	St+1 = St−βStIt
5	It+1 =It +βStIt−γIt
6	Rt+1 = Rt + γIt
7	To project the expected impact to Penn Medicine, we estimate the terms of the model.
8	To do this, we use a combination of estimates from other locations, informed estimates based on logical reasoning, and best guesses from the American Hospital Association.
9	Parameters
10	The model's parameters, β and γ , determine the severity of the epidemic. β can be interpreted as the effective contact rate: β=τ×c
11	which is the transmissibility τ multiplied by the average number of people exposed c. The transmissibility is the basic virulence of the pathogen. The number of 

## Finally, we can find connections between the model and descriptions from the text

In [13]:
# REMINDER: CHANGE THE API HERE!!!!
connect.code_text_connection(code, text)

Best description for python function get_growth_rate is in lines 25-25:
	24	To estimate β directly, we'd need to know transmissibility and social contact rates. Since we don't know these things, we can extract it from known doubling times. The AHA says to expect a doubling time Td of 7-10 days. That means an early-phase rate of growth can be computed by using the doubling time formula:
>>	25	g = 21/Td −1
	26	Since the rate of new infections in the SIR model is g = βS − γ and we've already computed γ , β becomes
---------------------------------------
Best description for python function get_beta is in lines 11-12:
	10	The model's parameters, β and γ , determine the severity of the epidemic. β can be interpreted as the effective contact rate: β=τ×c
>>	11	which is the transmissibility τ multiplied by the average number of people exposed c. The transmissibility is the basic virulence of the pathogen. The number of people exposed c is the parameter that can be changed through social distan

# Demo Part 4: Data-to-Model Matching

## The Bucky model is large. Imagine the user has chosen some interesting functions...

In [50]:
print(BUCKY_model_source)

def estimate_cfr(
    g_data,
    base_CFR,
    case_to_death_time,
    Rh_gamma_k,
    S_age_dist,
    days_back=7,
):
    """Estimate CFR from recent case data."""

    mean = case_to_death_time  # params["H_TIME"] + params["I_TO_H_TIME"] #+ params["D_REPORT_TIME"]
    adm2_mean = xp.sum(S_age_dist * mean[..., None], axis=0)
    k = Rh_gamma_k

    rolling_case_hist = g_data.csse_data.incident_cases
    rolling_death_hist = g_data.csse_data.incident_deaths

    t_max = rolling_case_hist.shape[0]
    x = xp.arange(0.0, t_max)

    # adm0
    adm0_inc_cases = xp.sum(rolling_case_hist, axis=1)
    adm0_inc_deaths = xp.sum(rolling_death_hist, axis=1)

    adm0_theta = xp.sum(adm2_mean * g_data.Nj / g_data.N) / k

    w = 1.0 / (xp.special.gamma(k) * adm0_theta**k) * x ** (k - 1) * xp.exp(-x / adm0_theta)
    w = w / (1.0 - w)
    w = w / xp.sum(w)
    w = w[::-1]

    # n_loc = rolling_case_hist.shape[1]
    cfr = xp.empty((days_back,))
    for i in range(days_back):
        d = i + 1
  

## And maintains some interesting relevant datasets...

In [15]:
# REMINDER: Let's change this so that in the preamble part up above, we load each of these datasets into a DF.
# We can then list the names of the dataframes, and show the schema plus the first two rows of each one.

dataset_dir =  "./model/Bucky/data_sample/"
connect.print_list(dataset_dir)

ls  ./model/Bucky/data_sample/
	 covid_deaths_usafacts.csv
	 nychealth.csv
	 covid_tracking.csv
	 usafacts_hist.csv
	 covid_confirmed_usafacts.csv


## Finally, we can find connections between the functions and columns in these datasets

In [53]:
# REMINDER: Fix this so it uses improved variable names.
# Ideally, the output would be a bit nicer but not absolutely crucial

connect.code_dataset_connection(code,dataset_dir)

OpenAI connection error: This model's maximum context length is 4097 tokens, however you requested 4754 tokens (4498 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.
