In [3]:
class Histogram:
    """
    Divide the points into groups. 
    The quick groups are defined such that
    for group zero, value x is within the group if
    
    Group 0:
    x >= 0 and x < boundary[0]
    
    The rest of the groups:
    x >= boundary[n-1] and x < boundary[n]

    The boundary values are monotonically increasing in value. 
    The returned vector contains the count of each x values in each group.
    """
    def group_by(self, boundary): #virtual method
        raise NotImplementedError("groupBy method must be implemented in derived classes.")

Write a class that derives from the Histogram class and implements the virtual method defined by it. 

Call the derived class DataSet. The derived class should have a constructor as follows:

        DataSet::DataSet(const double* xvalues, const size_t count);

In [8]:
class DataSet(Histogram):
    # Constructor
    def __init__(self, xvalues):
        self.data = sorted(xvalues)

    def group_by(self, boundary):
        """
        The quick groups are defined such that for group zero, value x is within the group if:
        x >= 0 and x < boundary[0]
        
        The rest of the groups:
        x >= boundary[n-1] and x < boundary[n]

        The boundary values are monotonically increasing in value. 
        The returned vector contains the count of each x values in each group.
        """
        # Initialize the group zero
        group = [0] * len(boundary)
        # Iterate through the data
        for x in self.data:
            # Check if x is in group zero
            if x >= 0 and x < boundary[0]:
                group[0] += 1
            # Check if x is in the rest of the groups
            for i in range(1, len(boundary)):
                if x >= boundary[i-1] and x < boundary[i]:
                    group[i] += 1
        return group

# Use the derived class to test the method
data = DataSet([1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0])
boundary = [2.0, 3.0, 4.0, 5.0]

groups = data.group_by(boundary)

for i, count in enumerate(groups):
    print(f"Group {i}: {count}")

Group 0: 1
Group 1: 2
Group 2: 2
Group 3: 2


In [14]:
# List comprehension to print the group number, the count of xvalues in each group and also the boundary values
print("\n".join([f"Group {i}: {count} [{boundary[i-1] if i > 0 else 0} <= x < {boundary[i]}]" for i, count in enumerate(groups)]))

Group 0: 1 [0 <= x < 2.0]
Group 1: 2 [2.0 <= x < 3.0]
Group 2: 2 [3.0 <= x < 4.0]
Group 3: 2 [4.0 <= x < 5.0]


In [17]:
class Histogram:
    """
    Divide the points into groups. 
    The quick groups are defined such that
    for group zero, value x is within the group if
    
    Group 0:
    x >= 0 and x < boundary[0]
    
    The rest of the groups:
    x >= boundary[n-1] and x < boundary[n]

    The boundary values are monotonically increasing in value. 
    The returned vector contains the count of each x values in each group.
    """
    def group_by(self, boundary): #virtual method
        raise NotImplementedError("groupBy method must be implemented in derived classes.")


# Notes:
# dtype(boundary) = list
# dtype(xvalues) = list
# The boundary values are monotonically increasing in value.
# I want to return a vector containing the count of each x values in each group.
# Vector is called "group"


class DataSet(Histogram):
    # Constructor
    def __init__(self, xvalues):
        self.xvalues = sorted(xvalues)

    def group_by(self, boundary):
        """
        The quick groups are defined such that for group zero, value x is within the group if:
        x >= 0 and x < boundary[0]
        
        The rest of the groups:
        x >= boundary[n-1] and x < boundary[n]

        The boundary values are monotonically increasing in value. 
        The returned vector contains the count of each x values in each group.
        """
        # Initializing by creating a list of zeros with the same length as boundary
        group = [0] * len(boundary)
        # Iterate through the data
        for x in self.xvalues:
            # Check if x is in group zero
            if x >= 0 and x < boundary[0]:
                group[0] += 1
            # Check if x is in the rest of the groups
            for i in range(1, len(boundary)):
                if x >= boundary[i-1] and x < boundary[i]:
                    group[i] += 1
        return group

# Use the derived class to test the method
xvalues = DataSet([1, 3, 5, 7, 9, 11, 13, 15])
boundary = [3,7,11,15]

groups = xvalues.group_by(boundary)

for i, count in enumerate(groups):
    print(f"Group {i}: {count}")

# # List comprehension to print the group number, the count of xvalues in each group and also the boundary values
# print("\n".join([f"Group {i}: {count} [{boundary[i-1] if i > 0 else 0} <= x < {boundary[i]}]" for i, count in enumerate(groups)]))

Group 0: 1
Group 1: 2
Group 2: 2
Group 3: 2


In [22]:
# Import binom from scipy.stats
from scipy.stats import binom


binom.pmf(3, 3, 0.5)

0.125

In [23]:
from scipy.stats import poisson

poisson.pmf(20,10)

0.0018660813139987742

In [28]:
a = [0,1,2,3]
b = [4,5,6,3]

set(a).intersection(b)


False

In [29]:
# initialize counter
i = 0

while i < 10:
    i += 1
    if i % 3 == 0:
        continue
    print(i)

1
2
4
5
7
8
10
