# Bottom-Up Cube (BUC) Algorithm

In [3]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
from itertools import combinations
from memory_profiler import profile

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [4]:
data = pd.read_csv('Electric_Vehicle_Data.csv')
data.head(5)

Unnamed: 0,VIN (1-10),County,City,State,Postal Code,Model Year,Make,Model,Electric Vehicle Type,Clean Alternative Fuel Vehicle (CAFV) Eligibility,Electric Range,Base MSRP,Legislative District,DOL Vehicle ID,Vehicle Location,Electric Utility,2020 Census Tract
0,WAUTPBFF4H,King,Seattle,WA,98126.0,2017,AUDI,A3,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,16,0,34.0,235085336,POINT (-122.374105 47.54468),CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA),53033010000.0
1,WAUUPBFF2J,Thurston,Olympia,WA,98502.0,2018,AUDI,A3,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,16,0,22.0,237896795,POINT (-122.943445 47.059252),PUGET SOUND ENERGY INC,53067010000.0
2,5YJSA1E22H,Thurston,Lacey,WA,98516.0,2017,TESLA,MODEL S,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,210,0,22.0,154498865,POINT (-122.78083 47.083975),PUGET SOUND ENERGY INC,53067010000.0
3,1C4JJXP62M,Thurston,Tenino,WA,98589.0,2021,JEEP,WRANGLER,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,25,0,20.0,154525493,POINT (-122.85403 46.856085),PUGET SOUND ENERGY INC,53067010000.0
4,5YJ3E1EC9L,Yakima,Yakima,WA,98902.0,2020,TESLA,MODEL 3,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,308,0,14.0,225996361,POINT (-120.524012 46.5973939),PACIFICORP,53077000000.0


## In-Memory Implementation

In [10]:
class BUC:
    def __init__(self,numDims,minsup):
        self.numDims = numDims
        self.minsup = minsup
        self.data = []
        self.output = []

    def aggregate(self,input):
        self.data = [list(x) for x in input]

    def Cardinality(self,data,dimension):
        if(self.numDims == 0):
            return 0
        max = data[0][dimension]
        for i in range(len(data)):
            if data[i][dimension] > max:
                max = data[i][dimension]
        return max+1
        
    def Partition(self,input,d,C,dataCount):
        counting_sort_freq = [0]*C  
        dataCount = [0]*C
        for i in range(len(input)):
            counting_sort_freq[input[i][d]] += 1
        for i in range(C):
            dataCount[i] = counting_sort_freq[i]
        
        for i in range(1,C):
            counting_sort_freq[i] += counting_sort_freq[i-1]
        
        sorted_data = [0]*len(input)
        for i in range(len(input)-1,-1,-1):
            sorted_data[counting_sort_freq[input[i][d]]-1] = input[i]
            counting_sort_freq[input[i][d]] -= 1
        
        return sorted_data
 
    def run(self, input_data, dim=0, path=None):
        if path is None:
            path = []

        if len(input_data) == 1:
            self.output.append(path + [1])
            return

        for d in range(dim, self.numDims):
            C = self.Cardinality(input_data, d)
            dataCount = [0] * C
            input_data = self.Partition(input_data, d, C, dataCount)

            start_idx = 0
            for i in range(C):
                count = dataCount[i] - start_idx

                if count >= self.minsup:
                    new_path = path + [input_data[start_idx][d]]
                    self.run(input_data[start_idx:start_idx + count], d + 1, new_path)

                start_idx = dataCount[i]

        if not path:
            self.output.append(path + ['ALL'])
    
    def print_result(self):
        self.run(self.data)
        for record in self.output:
            print(record)

    def get_result(self):
        return self.output

['ALL']
