# Important: Run this code cell each time you start a new session!

In [None]:
!pip install os
import os
import numpy as np
import pandas as pd

In [None]:
# Download some files of a sepsis dataset
sepsis_folder = "sepsis_dataset"
if not os.path.exists(sepsis_folder):
    os.mkdir(sepsis_folder)
patient_list = range(1, 11)
patient_list = [f'p{str(s).zfill(6)}.psv' for s in patient_list]
for f in patient_list:
  !wget -nc https://physionet.org/files/challenge-2019/1.0.0/training/training_setA/{f}
  os.rename(f, os.path.join(sepsis_folder, f))

In [None]:
# Convert the sepsis dataset to a single csv
def load_single_file(file_path):
    df = pd.read_csv(file_path, sep="|")
    df['PatientID'] = file_path.split(os.sep)[-1][:-4]
    df['Hour'] = df.index
    keep_cols = ['PatientID', 'Age', 'Gender', 'SepsisLabel', 'Hour',
                 'HR', 'O2Sat', 'SBP', 'DBP', 'Resp']
    df = df[keep_cols]
    df.rename(columns={'Gender': 'Sex', 'SepsisLabel': 'HasSepsis'}, inplace=True)
    return df

def create_final_table(patient_list):
    final_df = pd.DataFrame()
    for f in patient_list:
        df = load_single_file(os.path.join(sepsis_folder, f))
        final_df = pd.concat([final_df, df])
    final_df.to_csv('sepsis.csv',index=False)
create_final_table(patient_list)

In [None]:
import os, datetime, json, locale, pathlib, urllib, requests, werkzeug, nbformat, google, yaml, warnings
def colab2pdf():
    locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
    NAME = pathlib.Path(werkzeug.utils.secure_filename(urllib.parse.unquote(requests.get(f"http://{os.environ['COLAB_JUPYTER_IP']}:{os.environ['KMP_TARGET_PORT']}/api/sessions").json()[0]["name"])))
    TEMP = pathlib.Path("/content/pdfs") / f"{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}_{NAME.stem}"; TEMP.mkdir(parents=True, exist_ok=True)
    NB = [cell for cell in nbformat.reads(json.dumps(google.colab._message.blocking_request("get_ipynb", timeout_sec=30)["ipynb"]), as_version=4).cells if "--Colab2PDF" not in cell.source]
    warnings.filterwarnings('ignore', category=nbformat.validator.MissingIDFieldWarning)
    with (TEMP / f"{NAME.stem}.ipynb").open("w", encoding="utf-8") as nb_copy: nbformat.write(nbformat.v4.new_notebook(cells=NB or [nbformat.v4.new_code_cell("#")]), nb_copy)
    if not pathlib.Path("/usr/local/bin/quarto").exists():
        !wget -q "https://quarto.org/download/latest/quarto-linux-amd64.deb" -P {TEMP} && dpkg -i {TEMP}/quarto-linux-amd64.deb > /dev/null && quarto install tinytex --update-path --quiet
    with (TEMP / "config.yml").open("w", encoding="utf-8") as file: yaml.dump({'include-in-header': [{"text": r"\usepackage{fvextra}\DefineVerbatimEnvironment{Highlighting}{Verbatim}{breaksymbolleft={},showspaces=false,showtabs=false,breaklines,breakanywhere,commandchars=\\\{\}}"}],'include-before-body': [{"text": r"\DefineVerbatimEnvironment{verbatim}{Verbatim}{breaksymbolleft={},showspaces=false,showtabs=false,breaklines}"}]}, file)
    !quarto render {TEMP}/{NAME.stem}.ipynb --metadata-file={TEMP}/config.yml --to pdf -M latex-auto-install -M margin-top=1in -M margin-bottom=1in -M margin-left=1in -M margin-right=1in --quiet
    google.colab.files.download(str(TEMP / f"{NAME.stem}.pdf"))

# Instructions

Please complete all of the exercises below. Across this module, some of the exercises are expected to produce very specific outputs, while others may have a variety of reasonable answers.

# Exercise 1: Examining a Numpy Array

**(Part 1)** Create an Numpy array with the following values:

<table>
  <tr>
    <td>3</td>
    <td>1</td>
    <td>7</td>
  </tr>

  <tr>
    <td>6</td>
    <td>10</td>
    <td>4</td>
  </tr>
</table>

In [None]:
# Write your code here

**(Part 2)** What are the dimensions of this array? In other words, how many rows and columns does it have?

Write your answer here: ???

**(Part 3)** Write code that will print out the number of rows in the array.

In [None]:
# Write your code here

**(Part 4)** Write code that will index into the array and retrieve the value `7`.

In [None]:
# Write your code here

**(Part 5)** Write code that will index into the array and retrieve the first row (`[3, 1, 7]`).

In [None]:
# Write your code here

**(Part 6)** Write code that will index into the array and retrieve the first two columns (`[[3, 1], [6, 10]]`).

In [None]:
# Write your code here

**(Part 7)** Write code that will produce the sum along each column of the array and multiply it by 4.

In [None]:
# Write your code here

# Exercise 2: Creating a Numpy Array

In this exercise, you will be asked to create the following array using multiple methods:

<table>
  <tr>
    <td>0</td>
    <td>1</td>
    <td>2</td>
    <td>3</td>
    <td>4</td>
    <td>5</td>
  </tr>

  <tr>
    <td>0</td>
    <td>5</td>
    <td>10</td>
    <td>15</td>
    <td>20</td>
    <td>25</td>
  </tr>
</table>

Notice that the bottom row is equal to the top row multiplied by 5.


**(Part 1)** Write code that will create the array using `np.array()` with a list as the input.

In [None]:
# Write your code here

**(Part 2)** Write code that will create the array by combining the two arrays provided below.

In [None]:
row1 = np.array([0, 1, 2, 3, 4, 5])
row2 = np.array([0, 5, 10, 15, 20, 25])
# Write your code here

**(Part 3)** Write code that will create the array using a similar approach to what you used in **(Part 2)**, but create `row1` and `row2` using `np.arange()` and basic arithmetic. In other words, create `row1` and `row2` without needing to type each individual list element.

In [None]:
# Write your code here

**(Part 4)** Write code that will fill in the provided empty array of zeroes with the values of the target array using at least one `for` loop.

*Hint:* You could either do a single `for` loop that fills in one column per iteration, or you could do a double-nested `for` and `if` statements to fill in one value at a time.

In [None]:
arr = np.zeros((2, 6))
# Write your code here

# Exercise 3: Examining a Pandas DataFrame

This exercise will involve a `DataFrame` similar to the one we used in class.

*Important:* Note that the patients in this DataFrame are represented across multiple rows corresponding to different hours when measurements were taken.

In [None]:
df = pd.read_csv("sepsis.csv")
df

**(Part 1)** Write code that will show the number of patients in the dataset.

In [None]:
# Write your code here

**(Part 2)** Write code that will show the number of male (`0`) and female (`1`) patients in the dataset.

In [None]:
# Write your code here

**(Part 3)** Write code that will print out the ID of the oldest patient.

In [None]:
# Write your code here

**(Part 4)** Write code that prints out who experienced sepsis and at which hours it was reported. In other words, retrieve the `PatientID` and `Hour` values for all rows for which `'HasSepsis' == 1`.

In [None]:
# Write your code here

**(Part 5)** Write code that will calculate the average age of all the patients.

In [None]:
# Write your code here

# Prepare Submission

To get full credit for this assignment, you should submit your assignment in two formats so that we can easily grade and debug your code:
1. **.ipynb:** First, confirm that your code can run from start to finish without any errors. To check this, go to "Runtime" > "Run all" in the Google Colab menu. If everything looks good, you can export your file by going to "File" > "Download" > "Download .ipynb".
2. **.pdf:** Run the function called `colab2pdf()` below. This will automatically convert your notebook to a PDF. Note that while "File" > "Print" > "Save as PDF" also works, it requires you to manually expand all of the cells and may cut off some images.

In [None]:
colab2pdf()