In [1]:
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, split, row_number
from pyspark.sql.window import Window
from pyspark.sql.types import DateType, StringType, IntegerType
from pyspark.ml.regression import LinearRegression, DecisionTreeRegressor, IsotonicRegression, RandomForestRegressor
from pyspark.ml.linalg import Vectors, DenseVector, VectorUDT
from pyspark.ml.feature import OneHotEncoder, StringIndexer, VectorAssembler
from pyspark.ml.evaluation import RegressionEvaluator

In [2]:
from datetime import datetime

In [3]:
sc = SparkContext()

In [4]:
sqlc = SparkSession(sc)

In [5]:
reading = sqlc.read.csv('Admissions 2015-16.csv', header=True)

In [59]:
reading.show(5)

+------------+--------+---------------+-----------+-------------+--------------+--------+------------+
|Removal Date|Hospital|      Specialty|  Procedure|       Doctor|Patient Number|Priority|Waiting Days|
+------------+--------+---------------+-----------+-------------+--------------+--------+------------+
|  03/08/2015|     Mel|General Surgery|Lap Banding|Joseph Miller|     111365825|       C|        3396|
|  06/07/2015|     Mel|General Surgery|Lap Banding|Joseph Miller|     109970143|       C|        3356|
|  03/08/2015|     Mel|General Surgery|Lap Banding|Joseph Miller|     106770523|       C|        3244|
|  19/08/2015|     Mel|General Surgery|Lap Banding|Joseph Miller|     111176864|       C|        3229|
|  09/08/2015|     Mel|General Surgery|Lap Banding|Joseph Miller|     107085813|       C|        3190|
+------------+--------+---------------+-----------+-------------+--------------+--------+------------+
only showing top 5 rows



In [6]:
change_to_month_func = udf(lambda record: int(datetime.strftime(datetime.strptime(record, '%d/%m/%Y'), '%Y%m')), IntegerType())

In [7]:
reading_mod = reading.withColumn('Date', change_to_month_func(col('Removal Date'))).drop('Removal Date'
                                        ).withColumnRenamed('count(Removal Date)', 'patients_removed')

In [128]:
grouped = reading_mod.groupby('Date').agg({'Date': 'count'})

In [131]:
grouped.orderBy('Date').show(100)

+------+-----------+
|  Date|count(Date)|
+------+-----------+
|201506|         86|
|201507|       1217|
|201508|       1230|
|201509|       1305|
|201510|       1191|
|201511|       1384|
|201512|       1143|
|201601|       1094|
|201602|       1443|
|201603|       1364|
|201604|       1808|
|201605|       2200|
|201606|       2006|
+------+-----------+



In [9]:
change_to_date_func = udf(lambda record: datetime.strptime(str(record), '%Y%m'), DateType())

In [148]:
grouped_with_date = grouped.withColumn('Date', change_to_date_func(col('Date')))

In [149]:
window_row = Window().orderBy('Date')

In [151]:
grouped_new = grouped_with_date.withColumn('id', row_number().over(window_row))

In [152]:
grouped_new.show(100)

+----------+-----------+---+
|      Date|count(Date)| id|
+----------+-----------+---+
|2015-06-01|         86|  1|
|2015-07-01|       1217|  2|
|2015-08-01|       1230|  3|
|2015-09-01|       1305|  4|
|2015-10-01|       1191|  5|
|2015-11-01|       1384|  6|
|2015-12-01|       1143|  7|
|2016-01-01|       1094|  8|
|2016-02-01|       1443|  9|
|2016-03-01|       1364| 10|
|2016-04-01|       1808| 11|
|2016-05-01|       2200| 12|
|2016-06-01|       2006| 13|
+----------+-----------+---+



In [10]:
to_vector = udf(lambda record: Vectors.dense(record), VectorUDT())

In [154]:
grouped_new_1 = grouped_new.withColumn('id', to_vector(col('id')))

In [243]:
testing_df = grouped_new_1.where(col('Date') > datetime(2016,4,2))

In [244]:
training_df = grouped_new_1.filter(col('Date') < datetime(2016,6,2))

In [230]:
grouped_new_1.show(100)

+----------+-----------+------+
|      Date|count(Date)|    id|
+----------+-----------+------+
|2015-06-01|         86| [1.0]|
|2015-07-01|       1217| [2.0]|
|2015-08-01|       1230| [3.0]|
|2015-09-01|       1305| [4.0]|
|2015-10-01|       1191| [5.0]|
|2015-11-01|       1384| [6.0]|
|2015-12-01|       1143| [7.0]|
|2016-01-01|       1094| [8.0]|
|2016-02-01|       1443| [9.0]|
|2016-03-01|       1364|[10.0]|
|2016-04-01|       1808|[11.0]|
|2016-05-01|       2200|[12.0]|
|2016-06-01|       2006|[13.0]|
+----------+-----------+------+



In [249]:
lr = LinearRegression(maxIter=100, regParam=0.01, elasticNetParam = 1.0, labelCol="count(Date)", featuresCol="id")

In [250]:
model_lr = lr.fit(training_df.select('count(Date)', 'id'))

In [251]:
model_lr.coefficients

DenseVector([105.1511])

In [252]:
transformed_lr = model_lr.transform(testing_df)

In [253]:
transformed_lr.show(10)

+----------+-----------+------+------------------+
|      Date|count(Date)|    id|        prediction|
+----------+-----------+------+------------------+
|2016-05-01|       2200|[12.0]|1869.6783989763017|
|2016-06-01|       2006|[13.0]|1974.8294633869466|
+----------+-----------+------+------------------+



In [163]:
dt = DecisionTreeRegressor(maxDepth=3, labelCol="count(Date)", featuresCol="id")

In [254]:
model_dt = dt.fit(training_df.select('count(Date)', 'id'))

In [255]:
transformed_dt = model_dt.transform(testing_df)

In [256]:
transformed_dt.show(10)

+----------+-----------+------+----------+
|      Date|count(Date)|    id|prediction|
+----------+-----------+------+----------+
|2016-05-01|       2200|[12.0]|    2103.0|
|2016-06-01|       2006|[13.0]|    2103.0|
+----------+-----------+------+----------+



In [186]:
iso = IsotonicRegression(labelCol="count(Date)", featuresCol="id")

In [190]:
model_iso = iso.fit(training_df.select("count(Date)", "id"))

In [191]:
transformed_iso = model_iso.transform(testing_df)

In [192]:
transformed_iso.show(10)

+----------+-----------+------+----------+
|      Date|count(Date)|    id|prediction|
+----------+-----------+------+----------+
|2016-05-01|       2200|[12.0]|    1808.0|
|2016-06-01|       2006|[13.0]|    1808.0|
+----------+-----------+------+----------+



In [217]:
rf = RandomForestRegressor(numTrees=3, maxDepth=2,labelCol="count(Date)", featuresCol="id")
model_rf = rf.fit(training_df.select("count(Date)", "id"))
transformed_rd = model_rf.transform(testing_df)
transformed_rd.show(10)

+----------+-----------+------+----------+
|      Date|count(Date)|    id|prediction|
+----------+-----------+------+----------+
|2016-05-01|       2200|[12.0]|    1808.0|
|2016-06-01|       2006|[13.0]|    1808.0|
+----------+-----------+------+----------+



In [167]:
grouped_doctor = reading_mod.groupby('Date', 'Doctor').agg({'Doctor': 'count'})

In [168]:
grouped_doctor.show(10)

+------+----------------+-------------+
|  Date|          Doctor|count(Doctor)|
+------+----------------+-------------+
|201606|  Nathan Russell|           19|
|201604|  Carol Buckland|           34|
|201604|Connor Churchill|           11|
|201511|       Owen Gill|            7|
|201606|   Neil Rampling|           10|
|201509|  Jason Paterson|            6|
|201605|   Wendy Ellison|           32|
|201606|    Alan Coleman|            4|
|201602|  Brandon Fraser|            1|
|201605|   Sophie Miller|            2|
+------+----------------+-------------+
only showing top 10 rows



In [169]:
grouped_doctor_with_date = grouped_doctor.withColumn('Date', change_to_date_func(col('Date')))

In [170]:
window_row_doctor = Window().partitionBy('Doctor').orderBy('Date')

In [171]:
grouped_doctor_with_date_new = grouped_doctor_with_date.withColumn('id', row_number().over(window_row_doctor))

In [172]:
grouped_doctor_with_date_new.show(30)

+----------+-----------------+-------------+---+
|      Date|           Doctor|count(Doctor)| id|
+----------+-----------------+-------------+---+
|2015-06-01|       Sam Slater|            1|  1|
|2015-07-01|       Sam Slater|            1|  2|
|2015-08-01|       Sam Slater|            1|  3|
|2015-09-01|       Sam Slater|            7|  4|
|2015-10-01|       Sam Slater|            2|  5|
|2015-11-01|       Sam Slater|            5|  6|
|2015-12-01|       Sam Slater|            2|  7|
|2016-01-01|       Sam Slater|            1|  8|
|2016-02-01|       Sam Slater|            2|  9|
|2016-03-01|       Sam Slater|            1| 10|
|2016-04-01|       Sam Slater|            3| 11|
|2016-05-01|       Sam Slater|            4| 12|
|2016-06-01|       Sam Slater|            4| 13|
|2015-06-01|Alexander Skinner|            7|  1|
|2015-07-01|Alexander Skinner|           22|  2|
|2015-08-01|Alexander Skinner|           20|  3|
|2015-09-01|Alexander Skinner|           11|  4|
|2015-10-01|Alexande

In [173]:
to_vectors = udf(lambda col_a, col_b: Vectors.sparse(col_a, col_b))

In [174]:
strindexer = StringIndexer(inputCol="Doctor", outputCol="Doctor_idx")
model_strindexer = strindexer.fit(grouped_doctor_with_date_new)
grouped_doctor_with_date_new_indexed = model_strindexer.transform(grouped_doctor_with_date_new)

In [175]:
#encoder = OneHotEncoder(inputCol="Doctor_idx", outputCol="Doctor_coded")
#grouped_doctor_with_date_new_encoded = encoder.transform(grouped_doctor_with_date_new_indexed)

In [176]:
grouped_doctor_with_date_new_indexed.show(2)

+----------+----------+-------------+---+----------+
|      Date|    Doctor|count(Doctor)| id|Doctor_idx|
+----------+----------+-------------+---+----------+
|2015-06-01|Sam Slater|            1|  1|       9.0|
|2015-07-01|Sam Slater|            1|  2|       9.0|
+----------+----------+-------------+---+----------+
only showing top 2 rows



In [177]:
assembler = VectorAssembler(inputCols=["id", "Doctor_idx"], outputCol="features")
grouped_doctor_with_date_new_1 = assembler.transform(grouped_doctor_with_date_new_indexed)

In [178]:
grouped_doctor_with_date_new_1.show(10)

+----------+----------+-------------+---+----------+----------+
|      Date|    Doctor|count(Doctor)| id|Doctor_idx|  features|
+----------+----------+-------------+---+----------+----------+
|2015-06-01|Sam Slater|            1|  1|       9.0| [1.0,9.0]|
|2015-07-01|Sam Slater|            1|  2|       9.0| [2.0,9.0]|
|2015-08-01|Sam Slater|            1|  3|       9.0| [3.0,9.0]|
|2015-09-01|Sam Slater|            7|  4|       9.0| [4.0,9.0]|
|2015-10-01|Sam Slater|            2|  5|       9.0| [5.0,9.0]|
|2015-11-01|Sam Slater|            5|  6|       9.0| [6.0,9.0]|
|2015-12-01|Sam Slater|            2|  7|       9.0| [7.0,9.0]|
|2016-01-01|Sam Slater|            1|  8|       9.0| [8.0,9.0]|
|2016-02-01|Sam Slater|            2|  9|       9.0| [9.0,9.0]|
|2016-03-01|Sam Slater|            1| 10|       9.0|[10.0,9.0]|
+----------+----------+-------------+---+----------+----------+
only showing top 10 rows



In [257]:
training_df = grouped_doctor_with_date_new_1

In [258]:
testing_df = grouped_doctor_with_date_new_1.where(col('Date') > datetime(2016,4,2))

In [267]:
lrd = LinearRegression(maxIter=1000, regParam=0.01, elasticNetParam=0.0, labelCol="count(Doctor)", featuresCol="features")
model_lrd = lrd.fit(training_df.select("features", "count(Doctor)"))
transformed_lrd = model_lrd.transform(testing_df)

In [266]:
transformed_lrd.show(100)

+----------+------------------+-------------+---+----------+-----------+-------------------+
|      Date|            Doctor|count(Doctor)| id|Doctor_idx|   features|         prediction|
+----------+------------------+-------------+---+----------+-----------+-------------------+
|2016-05-01|        Sam Slater|            4| 12|       9.0| [12.0,9.0]| 22.265878226736465|
|2016-06-01|        Sam Slater|            4| 13|       9.0| [13.0,9.0]| 23.016920983918006|
|2016-05-01| Alexander Skinner|           38| 12|      11.0|[12.0,11.0]|  22.06068399092705|
|2016-06-01| Alexander Skinner|           32| 13|      11.0|[13.0,11.0]| 22.811726748108587|
|2016-05-01|    Sally Morrison|           13| 11|      65.0|[11.0,65.0]|  15.76939686689127|
|2016-06-01|    Sally Morrison|           22| 12|      65.0|[12.0,65.0]| 16.520439624072807|
|2016-06-01|    Brandon Fraser|            1| 11|      85.0|[11.0,85.0]| 13.717454508797108|
|2016-05-01|       Joseph Ince|            1|  1|     177.0|[1.0,177.0

In [274]:
regev = RegressionEvaluator(labelCol="count(Doctor)")
regev.evaluate(transformed_lrd.select("count(Doctor)", "prediction"), {regev.metricName: "mae"})

10.278008729851091

In [208]:
isod = IsotonicRegression(labelCol="count(Doctor)", featuresCol="features")

In [209]:
model_isod = isod.fit(training_df.select("features", "count(Doctor)"))

In [210]:
transformed_isod = model_isod.transform(testing_df)

In [211]:
transformed_isod.show(100)

+----------+------------------+-------------+---+----------+-----------+------------------+
|      Date|            Doctor|count(Doctor)| id|Doctor_idx|   features|        prediction|
+----------+------------------+-------------+---+----------+-----------+------------------+
|2016-05-01|        Sam Slater|            4| 12|       9.0| [12.0,9.0]|              56.0|
|2016-06-01|        Sam Slater|            4| 13|       9.0| [13.0,9.0]|              56.0|
|2016-05-01| Alexander Skinner|           38| 12|      11.0|[12.0,11.0]|              56.0|
|2016-06-01| Alexander Skinner|           32| 13|      11.0|[13.0,11.0]|              56.0|
|2016-05-01|    Sally Morrison|           13| 11|      65.0|[11.0,65.0]|              37.0|
|2016-06-01|    Sally Morrison|           22| 12|      65.0|[12.0,65.0]|              56.0|
|2016-06-01|    Brandon Fraser|            1| 11|      85.0|[11.0,85.0]|              37.0|
|2016-05-01|       Joseph Ince|            1|  1|     177.0|[1.0,177.0]|        

In [22]:
grouped_specialty = reading_mod.groupby('Date', 'Specialty').agg({'Specialty': 'count'})
grouped_specialty_with_date = grouped_specialty.withColumn('Date', change_to_date_func(col('Date')))
window_row_specialty = Window().partitionBy('Specialty').orderBy('Date')
grouped_specialty_with_date_new = grouped_specialty_with_date.withColumn('id', 
                                                                         row_number().over(window_row_specialty))
strindexer = StringIndexer(inputCol="Specialty", outputCol="Specialty_idx")
model_strindexer = strindexer.fit(grouped_specialty_with_date_new)
grouped_specialty_with_date_new_indexed = model_strindexer.transform(grouped_specialty_with_date_new)
assembler = VectorAssembler(inputCols=["id", "Specialty_idx"], outputCol="features")
grouped_specialty_with_date_new_1 = assembler.transform(grouped_specialty_with_date_new_indexed)
training_specialty_df = grouped_specialty_with_date_new_1
testing_specialty_df = grouped_specialty_with_date_new_1.where(col('Date') > datetime(2016,4,2))
lrs = LinearRegression(maxIter=1000, regParam=0.01, elasticNetParam=0.0, labelCol="count(Specialty)", 
                       featuresCol="features")
model_lrs = lrs.fit(training_specialty_df.select("features", "count(Specialty)"))
transformed_lrs = model_lrs.transform(testing_specialty_df)
regev_s = RegressionEvaluator(labelCol="count(Specialty)")
regev_s.evaluate(transformed_lrs.select("count(Specialty)", "prediction"), {regev_s.metricName: "mae"})

92.19588622977538

In [23]:
transformed_lrs.show(100)

+----------+--------------------+----------------+---+-------------+-----------+------------------+
|      Date|           Specialty|count(Specialty)| id|Specialty_idx|   features|        prediction|
+----------+--------------------+----------------+---+-------------+-----------+------------------+
|2016-05-01|Cardiothoracic Su...|              20| 12|         10.0|[12.0,10.0]|  95.6616681050213|
|2016-06-01|Cardiothoracic Su...|              20| 13|         10.0|[13.0,10.0]|101.96177808629815|
|2016-05-01|          Obstetrics|               1|  7|         15.0| [7.0,15.0]|10.875152351507381|
|2016-06-01|          Obstetrics|               1|  8|         15.0| [8.0,15.0]| 17.17526233278423|
|2016-05-01|        Neurosurgery|              46| 12|          0.0| [12.0,0.0]| 202.2335997992807|
|2016-06-01|        Neurosurgery|              33| 13|          0.0| [13.0,0.0]|208.53370978055756|
|2016-05-01| Orthopaedic Surgery|             302| 12|          3.0| [12.0,3.0]| 170.2620202910029|


In [18]:
isos = IsotonicRegression(labelCol="count(Specialty)", featuresCol="features")
model_isos = isos.fit(training_specialty_df.select("features", "count(Specialty)"))
transformed_isos = model_isos.transform(testing_specialty_df)
regev_s = RegressionEvaluator(labelCol="count(Specialty)")
regev_s.evaluate(transformed_isos.select("count(Specialty)", "prediction"), {regev_s.metricName: "mae"})

116.3355022170811

In [19]:
transformed_isos.show(100)

+----------+--------------------+----------------+---+-------------+-----------+------------------+
|      Date|           Specialty|count(Specialty)| id|Specialty_idx|   features|        prediction|
+----------+--------------------+----------------+---+-------------+-----------+------------------+
|2016-05-01|Cardiothoracic Su...|              20| 12|         10.0|[12.0,10.0]| 189.3333333333333|
|2016-06-01|Cardiothoracic Su...|              20| 13|         10.0|[13.0,10.0]|             244.0|
|2016-05-01|          Obstetrics|               1|  7|         15.0| [7.0,15.0]| 77.28421052631546|
|2016-06-01|          Obstetrics|               1|  8|         15.0| [8.0,15.0]| 77.28421052631546|
|2016-05-01|        Neurosurgery|              46| 12|          0.0| [12.0,0.0]| 189.3333333333333|
|2016-06-01|        Neurosurgery|              33| 13|          0.0| [13.0,0.0]|             244.0|
|2016-05-01| Orthopaedic Surgery|             302| 12|          3.0| [12.0,3.0]| 189.3333333333333|


In [34]:
grouped_hospital = reading_mod.groupby('Date', 'Hospital').agg({'Hospital': 'count'})
grouped_hospital_with_date = grouped_hospital.withColumn('Date', change_to_date_func(col('Date')))
window_row_hospital = Window().partitionBy('Hospital').orderBy('Date')
grouped_hospital_with_date_new = grouped_hospital_with_date.withColumn('id', 
                                                                         row_number().over(window_row_hospital))
strindexer = StringIndexer(inputCol="Hospital", outputCol="Hospital_idx")
model_strindexer = strindexer.fit(grouped_hospital_with_date_new)
grouped_hospital_with_date_new_indexed = model_strindexer.transform(grouped_hospital_with_date_new)
assembler = VectorAssembler(inputCols=["id", "Hospital_idx"], outputCol="features")
grouped_hospital_with_date_new_1 = assembler.transform(grouped_hospital_with_date_new_indexed)
training_hospital_df = grouped_hospital_with_date_new_1
testing_hospital_df = grouped_hospital_with_date_new_1.where(col('Date') > datetime(2016,4,2))
lrh = LinearRegression(maxIter=1000, regParam=0.0, elasticNetParam=1.0, labelCol="count(Hospital)", 
                       featuresCol="features")
model_lrh = lrh.fit(training_hospital_df.select("features", "count(Hospital)"))
transformed_lrh = model_lrh.transform(testing_hospital_df)
regev_h = RegressionEvaluator(labelCol="count(Hospital)")
regev_h.evaluate(transformed_lrh.select("count(Hospital)", "prediction"), {regev_s.metricName: "mae"})

185.14539055426448

In [35]:
transformed_lrh.show(100)

+----------+--------+---------------+---+------------+----------+-----------------+
|      Date|Hospital|count(Hospital)| id|Hospital_idx|  features|       prediction|
+----------+--------+---------------+---+------------+----------+-----------------+
|2016-05-01|     Hob|            247| 12|         3.0|[12.0,3.0]|266.4807692307692|
|2016-06-01|     Hob|            237| 13|         3.0|[13.0,3.0]|292.7692307692307|
|2016-05-01|     Syd|            927| 12|         0.0|[12.0,0.0]|668.3653846153845|
|2016-06-01|     Syd|            790| 13|         0.0|[13.0,0.0]| 694.653846153846|
|2016-05-01|    Bris|            227| 12|         2.0|[12.0,2.0]|400.4423076923076|
|2016-06-01|    Bris|            205| 13|         2.0|[13.0,2.0]|426.7307692307691|
|2016-05-01|     Mel|            799| 12|         1.0|[12.0,1.0]|534.4038461538461|
|2016-06-01|     Mel|            774| 13|         1.0|[13.0,1.0]|560.6923076923075|
+----------+--------+---------------+---+------------+----------+-----------

In [29]:
isoh = IsotonicRegression(labelCol="count(Hospital)", featuresCol="features")
model_isoh = isoh.fit(training_hospital_df.select("features", "count(Hospital)"))
transformed_isoh = model_isoh.transform(testing_hospital_df)
regev_h = RegressionEvaluator(labelCol="count(Hospital)")
regev_h.evaluate(transformed_isoh.select("count(Hospital)", "prediction"), {regev_s.metricName: "mae"})

304.19571208352033

In [31]:
transformed_isoh.show(100)

+----------+--------+---------------+---+------------+----------+------------------+
|      Date|Hospital|count(Hospital)| id|Hospital_idx|  features|        prediction|
+----------+--------+---------------+---+------------+----------+------------------+
|2016-05-01|     Hob|            247| 12|         3.0|[12.0,3.0]|482.75000000000006|
|2016-06-01|     Hob|            237| 13|         3.0|[13.0,3.0]|             542.0|
|2016-05-01|     Syd|            927| 12|         0.0|[12.0,0.0]|482.75000000000006|
|2016-06-01|     Syd|            790| 13|         0.0|[13.0,0.0]|             542.0|
|2016-05-01|    Bris|            227| 12|         2.0|[12.0,2.0]|482.75000000000006|
|2016-06-01|    Bris|            205| 13|         2.0|[13.0,2.0]|             542.0|
|2016-05-01|     Mel|            799| 12|         1.0|[12.0,1.0]|482.75000000000006|
|2016-06-01|     Mel|            774| 13|         1.0|[13.0,1.0]|             542.0|
+----------+--------+---------------+---+------------+----------+

In [41]:
dth = DecisionTreeRegressor(maxDepth=3, labelCol="count(Hospital)", featuresCol="features")
model_dth = dth.fit(training_hospital_df.select('count(Hospital)', 'features'))
transformed_dth = model_dth.transform(testing_hospital_df)
regev_dth = RegressionEvaluator(labelCol="count(Hospital)")
regev_dth.evaluate(transformed_dth.select("count(Hospital)", "prediction"), {regev_dth.metricName: "mae"})

32.625

In [39]:
transformed_dth.show(100)

+----------+--------+---------------+---+------------+----------+----------+
|      Date|Hospital|count(Hospital)| id|Hospital_idx|  features|prediction|
+----------+--------+---------------+---+------------+----------+----------+
|2016-05-01|     Hob|            247| 12|         3.0|[12.0,3.0]|     229.0|
|2016-06-01|     Hob|            237| 13|         3.0|[13.0,3.0]|     229.0|
|2016-05-01|     Syd|            927| 12|         0.0|[12.0,0.0]|     822.5|
|2016-06-01|     Syd|            790| 13|         0.0|[13.0,0.0]|     822.5|
|2016-05-01|    Bris|            227| 12|         2.0|[12.0,2.0]|     229.0|
|2016-06-01|    Bris|            205| 13|         2.0|[13.0,2.0]|     229.0|
|2016-05-01|     Mel|            799| 12|         1.0|[12.0,1.0]|     822.5|
|2016-06-01|     Mel|            774| 13|         1.0|[13.0,1.0]|     822.5|
+----------+--------+---------------+---+------------+----------+----------+

