# A Step-by-Step Guide to HoloFusion example part 4/4 
   # (our approach with multiple weights per source)


# Setup
Firstly, we import all the module from Holofusion that we will use.

In [1]:
from holofusion import HoloFusion, HoloFusionSession
from time import time as t

##   Initialization
In this part, we create the HoloFusion and Session object that we will use for this example.

In [2]:
        holo_obj = HoloFusion(majority_vote=0, training_data=1, multiple_weights=1)
        session = HoloFusionSession("Session", holo_obj)
        fx = open('execution_time.txt', 'w')
        

  cursor.execute('SELECT @@tx_isolation')


## Read Input from file
Test data and training data will be read using the Session's ingestor.
After ingesting the test data will be loaded into MySQL tables along with entries in the a metadata table.

In [3]:
        list_time = []
        start_time = t()

        session.ingest_dataset("data/clean_flight/flight-data-test.csv")
        d = t()-start_time
        list_time.append(d)
        fx.write('ingest csv time: '+str(d)+'\n')
        print 'ingest csv time: '+str(d)+'\n'
        
        start_time = t()
        session.adding_training_data("data/clean_flight/flight-data_training.csv")
        d = t() - start_time
        list_time.append(d)
        fx.write('adding training data: ' + str(d) + '\n')
        print 'adding training time: ' + str(d) + '\n'
        
        print 'Clean table'
        sql = holo_obj.dataengine.get_table_to_dataframe("C_clean_flat", session.dataset)
        sql.show()
        print 'Don\'t know table'
        sql = holo_obj.dataengine.get_table_to_dataframe("C_dk_flat", session.dataset)
        sql.show()

ingest csv time: 8.22992897034

['Source', 'Flight_Num', 'Scheduled_Dept', 'Actual_Dept', 'Scheduled_Arrival', 'Actual_Arrival']
adding training time: 24.7288529873

Clean table
+-------------+--------------------+--------------+------------+
|       source|            rv_index|       rv_attr|assigned_val|
+-------------+--------------------+--------------+------------+
|        gofox|2011-12-01-AA-649...|Scheduled_Dept|        1:30|
|  foxbusiness|2011-12-01-AA-649...|Scheduled_Dept|        1:30|
| allegiantair|2011-12-01-AA-649...|Scheduled_Dept|        1:30|
|       boston|2011-12-01-AA-649...|Scheduled_Dept|        1:30|
|  travelocity|2011-12-01-AA-649...|Scheduled_Dept|        1:30|
|       orbitz|2011-12-01-AA-649...|Scheduled_Dept|        1:30|
|      weather|2011-12-01-AA-649...|Scheduled_Dept|        1:30|
|mytripandmore|2011-12-01-AA-649...|Scheduled_Dept|        1:30|
|           aa|2011-12-01-AA-306...|Scheduled_Dept|        8:20|
|  flightaware|2011-12-01-AA-306...|Schedu

## Featurization

In [4]:
        start_time = t()
        session.feature()
        d = t() - start_time
        list_time.append(d)
        fx.write('creating feature table time: '+str(d)+'\n')
        print 'Feature table'
        sql = holo_obj.dataengine.get_table_to_dataframe("Feature", session.dataset)
        sql.show()
        print ' feature table time: '+str(d)+'\n'

['Source', 'Flight_Num', 'Scheduled_Dept', 'Actual_Dept', 'Scheduled_Arrival', 'Actual_Arrival']
adding weight_id to feature table...
creating weight table
creating feature table with weights ids
adding weight_id to feature table is finished
Feature table
+---------+-------------------+--------------------+-----------------+------------+---------+-----+
|var_index|          Source_id|            rv_index|          rv_attr|assigned_val|weight_id|fixed|
+---------+-------------------+--------------------+-----------------+------------+---------+-----+
|        1|                 aa|2011-12-01-AA-100...|   Actual_Arrival|        2:57|       76|    0|
|        2|     flightexplorer|2011-12-01-AA-100...|   Actual_Arrival|        2:51|      125|    0|
|        3|    airtravelcenter|2011-12-01-AA-100...|   Actual_Arrival|        5:00|      126|    0|
|        4|         myrateplan|2011-12-01-AA-100...|   Actual_Arrival|        5:00|      127|    0|
|        5|        helloflight|2011-12-01-AA

#  Learning
In the learning phase, for our approach we get the weight from numbskull (one unique weight per source and attribute).


In [5]:
        start_time = t()
        session.inference()
        d = t() - start_time
        list_time.append(d)
        fx.write('inference time: ' + str(d) + '\n')
        print 'Probabilities table'
        sql = holo_obj.dataengine.get_table_to_dataframe("Probabilities", session.dataset)
        sql.show()
        print 'inference time: ' + str(d) + '\n'
        

numbskull is starting
wrapper is starting
wrapper is finished
numbskull is finished
adding weight is finished is finished
Probabilities table
+--------------------+-----------------+------------+--------------------+
|            rv_index|          rv_attr|assigned_val|         probability|
+--------------------+-----------------+------------+--------------------+
|2011-12-01-AA-100...|   Actual_Arrival|        2:51| 0.04010800235520191|
|2011-12-01-AA-100...|   Actual_Arrival|        2:57|   0.805590762220706|
|2011-12-01-AA-100...|   Actual_Arrival|        5:00| 0.15430123542409196|
|2011-12-01-AA-100...|      Actual_Dept|        2:07|0.015244990658602254|
|2011-12-01-AA-100...|      Actual_Dept|        5:00|  0.9847550093413977|
|2011-12-01-AA-100...|Scheduled_Arrival|        3:00|                 1.0|
|2011-12-01-AA-100...|   Scheduled_Dept|        1:55|                 1.0|
|2011-12-01-AA-101...|   Actual_Arrival|        5:00|   0.342388312163603|
|2011-12-01-AA-101...|   Actual_A

## Final Table and Accuracy Calculations

In [6]:
        start_time = t()

        session.accuracy("data/clean_flight/flight-data_truth.csv")
        d = t() - start_time
        list_time.append(d)
        fx.write('time to calculate accuracy: ' + str(d) + '\n')
        print 'time to calculate accuracy: ' + str(d) + '\n'
        fx.close()

['Flight_Num', 'Scheduled_Dept', 'Actual_Dept', 'Scheduled_Arrival', 'Actual_Arrival']
show only authors
+--------------------+-----------------+------------+
|            rv_index|          rv_attr|assigned_val|
+--------------------+-----------------+------------+
|2011-12-01-AA-100...|   Actual_Arrival|        2:57|
|2011-12-01-AA-100...|      Actual_Dept|        5:00|
|2011-12-01-AA-100...|Scheduled_Arrival|        3:00|
|2011-12-01-AA-100...|   Scheduled_Dept|        1:55|
|2011-12-01-AA-101...|   Actual_Arrival|        8:36|
|2011-12-01-AA-101...|      Actual_Dept|        5:00|
|2011-12-01-AA-101...|Scheduled_Arrival|        8:40|
|2011-12-01-AA-101...|   Scheduled_Dept|        7:15|
|2011-12-01-AA-103...|   Actual_Arrival|        3:14|
|2011-12-01-AA-103...|      Actual_Dept|        5:00|
|2011-12-01-AA-103...|Scheduled_Arrival|        3:20|
|2011-12-01-AA-103...|   Scheduled_Dept|        1:25|
|2011-12-01-AA-104...|   Actual_Arrival|        1:17|
|2011-12-01-AA-104...|      Act