In [None]:
import unittest
import source.check_data as chd
import pandas as pd
import numpy as np
from pathlib import Path
import os
import random

## Make test data: 

In [None]:
random.seed(43)

### Random floats between 0 and 8 to simulate time stamps measured in hours, then sort them from smallest to biggest:

In [None]:
round_counter = 0
random_floats = []
while round_counter < 10:
    random_float = random.uniform(0, 8)
    random_floats.append(random_float)
    round_counter += 1
random_floats

In [None]:
np.array(random_floats).dtype

In [None]:
random_floats.sort()
random_floats

### Put everything into a dataframe:

In [None]:
test_data = pd.DataFrame({'pseudo_time_stamp': random_floats})
test_data

### Categorize data into bins according to one hour time intervals by adding time stamp bins:

In [None]:
test_data["time_stamps_bins"] = pd.cut(test_data["pseudo_time_stamp"],
                               bins=[0, 1, 2, 3, 4, 5, 6, 7, 8, np.inf],
                               labels=[1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
test_data

### Engineer the number of different subject Ids

In [None]:
# Number of different categories per bin:

pseudo_subjects_1 = ['Sub_01', 'Sub_01'] # First bin, one category.
b1 = 1
pseudo_subjects_2 = ['Sub_02'] # Second bin, one category.
b2 = 1
pseudo_subjects_4 = ['Sub_02', 'Sub_03', 'Sub_02', 'Sub_01'] # Fourth bin, three different categories.
b4 = 3
pseudo_subjects_6 = ['Sub_04', 'Sub_01'] # Sixth bin, two categories.
b6 = 2
pseudo_subjects_7 = ['Sub_02'] # Seventh bin, one category.
b7 = 1

In [None]:
cat_num_list = [b1] + [b2] + [b4] + [b6] + [b7]
cat_num_list
time_bin_list = [1, 2, 4, 6, 7]
time_bin_list

In [None]:
print(len(set(pseudo_subjects_1)))
print(b1)
print(len(set(pseudo_subjects_2)))
print(b2)
print(len(set(pseudo_subjects_4)))
print(b4)
print(len(set(pseudo_subjects_6)))
print(b6)

In [None]:
pseudo_subjects = pseudo_subjects_1 + pseudo_subjects_2 + pseudo_subjects_4 + \
pseudo_subjects_6 +pseudo_subjects_7

In [None]:
test_data['subject'] = pseudo_subjects
test_data

## Engineer desired output based on the test-data:

In [None]:
desired_output = pd.DataFrame({'time_bin': time_bin_list, 'number_unique_subjects': cat_num_list})
desired_output

### Visually compare output to desired output:

In [None]:
chd.count_unique_subjects_per_hour(test_data, time_bin_list)

## Implement Unit-Test:

In [None]:
class TestCheckData(unittest.TestCase):
    def setUp(self):
        self.data = test_data
    # tests go here:
    def test_inputExists(self):
        self.assertIsNotNone(self.data)
        
    def test_inputType(self):
        self.assertIsInstance(self.data, pd.core.frame.DataFrame)
        
    def test_functReturnsSomething(self):
        self.assertIsNotNone(chd.count_unique_subjects_per_hour(self.data, time_bin_list))

    def test_outputType(self):
        self.assertIsInstance(chd.count_unique_subjects_per_hour(self.data, time_bin_list), pd.core.frame.DataFrame)

    def test_outputDimensions(self):
        df_output = chd.count_unique_subjects_per_hour(self.data, time_bin_list)
        df_output_desired = desired_output
        self.assertEqual(True, df_output.shape == df_output_desired.shape)
    
    def test_outputEqualsDesiredOutput(self):
        df_output = chd.count_unique_subjects_per_hour(self.data, time_bin_list)
        df_output_desired = desired_output
        self.assertEqual(True, df_output.equals(df_output_desired))

        
    
#if __name__ == "__main__":
 #   unittest.main()

unittest.main(argv=[''], verbosity=2, exit=False)
    