# Notebook for analyzing reservations and their usage

In [1]:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as mticker
import re
import hashlib
import time
import datetime as dt
from pandas.tseries.offsets import BDay
from matplotlib.lines import Line2D
import multiprocessing as mp
import calendar
from dateutil.relativedelta import relativedelta

%config InlineBackend.figure_format = 'retina'

# Allows setting values in dataframe copies without warnings (default='warn')
pd.options.mode.chained_assignment = None

source_dir = "data/"
res_eval = pd.read_csv(source_dir + "res_eval.csv", low_memory=False)    

print "Table/dataframe: res_eval (Length: %d)" % (len(res_eval))
print "\t" + "\n\t".join(sorted(res_eval.columns.tolist())), "\n"

Table/dataframe: res_eval (Length: 674)
	actual_end
	actual_node_hours
	actual_start
	approved
	approved_auto
	approved_later
	classic_node_hours
	deleted_during_res_window
	duration_d
	found_experiments_classic
	found_experiments_portal
	has_cancel
	has_delete
	has_submit
	matching_expts
	nodes
	percent_util
	pid_idx
	portal_node_hours
	requested_node_hours
	total_node_hours
	type 



## Details to consider here:

- Each row in `res_eval` is a reservation (there are 674 here).
- Most of these reservations are approved -- recorded in the `approved` column. They are auto-approved (`approved_auto`) or manually approved by CloudLab staff (`approved_later`).
- It a reservation is cancelled (typically by the user who submitted the reservation), it will be recorded in `has_cancel`. Then `actual_end` will be updated and set to the moment of cancellation. 
- Hardware used is recorded in `type` and # of nodes -- in `nodes`.
- `matching_expts` -- list of IDs of experiments where the hardware of the requested type is used within the requested time interval by members of the project under which the reservation is submitted.
    - Short numeric IDs (e.g., 376013) -- IDs of experiments created using the classic interface.
    - Long alpha-numeric IDs (e.g., 84db441a-0d0c-11e7-ac8d-90e2ba22fee4) -- IDs of experiments created using CloudLab portal.
- `requested_node_hours` -- # of node-hours requested, and `actual_node_hours` -- # of node-hours in all experiments from above.

#### See examples below.

In [2]:
pd.DataFrame(res_eval.iloc[200].T)

Unnamed: 0,200
actual_end,2018-04-28 18:01:32
actual_node_hours,30.7667
actual_start,2018-04-28 17:00:00
approved,True
approved_auto,False
approved_later,True
classic_node_hours,16.4089
deleted_during_res_window,True
duration_d,1.625
found_experiments_classic,True


In [3]:
pd.DataFrame(res_eval.iloc[10].T)

Unnamed: 0,10
actual_end,2018-01-11 11:55:29
actual_node_hours,4746.24
actual_start,2018-01-07 13:00:00
approved,True
approved_auto,False
approved_later,True
classic_node_hours,0
deleted_during_res_window,True
duration_d,7.375
found_experiments_classic,False
