In [35]:
from numbers import Number
from typing import Union

class FeatureColumn:
    
    def __init__(self, values: Union[list[str], list[Number]]):
        assert len(values) > 0
        if all(isinstance(value, Number) for value in values):
            self.feature_type = "Quantitative"
        elif all(isinstance(value, str) for value in values):
            self.feature_type  = "Categorical"
        else:
            raise AttributeError("Values are neither consistently numbers not strings")
        self.values = values
        
    def __len__(self):
        return len(self.values)
    
    def __repr__(self):
        return f"FeatureColumn(type={self.feature_type}, values={self.values})"
    
    def __add__(self, other: "FeatureColumn") -> "FeatureColumn":
        if other.feature_type == "Categorical" or self.feature_type == "Categorical":
            raise TypeError("Can sum only quantitative features")
        
        res_values = [el1 + el2 for el1, el2 in zip(self.values, other.values)]
        return FeatureColumn(res_values)
    
    @property
    def unique(self):
        return sorted(set(self.values))
        
    def mean(self):
        if self.feature_type == "Categorical":
            raise TypeError("Cannot compute mean for categorical feature")
        return sum(self.values) / len(self.values)
    
    def mode(self):
        if self.feature_type == "Numerical":
            raise TypeError("Cannot compute mode for numerical feature")
        return max(set(self.values), key=self.values.count)
    
    def median(self):
        if self.feature_type == "Categorical":
            raise TypeError("Cannot compute median for categorical feature")
        srt  = sorted(self.values)
        n =  len(self.values)
        if n % 2 == 1:
            return srt[n // 2]
        else:
            return (srt[n // 2 - 1] + srt[n // 2]) / 2
        
    @classmethod
    def from_file(cls, file_path: str) -> "FeatureColumn":
        with open(file_path) as file:
            values = file.read().splitlines()
        try:
            values = [float(val) for val in values]
        except Error as e:
            print(e)
        return cls(values)

In [2]:
quant_feat = FeatureColumn([1.2, 3.5, 6.9, 9.9])

In [3]:
quant_feat.values

[1.2, 3.5, 6.9, 9.9]

In [4]:
quant_feat.feature_type

'Quantitative'

In [5]:
quant_feat.mean()

5.375

In [36]:
cat_feat = FeatureColumn(["blue", "orange", "blue", "green"])

In [37]:
cat_feat.feature_type

'Categorical'

In [38]:
cat_feat.mode()

'blue'

In [39]:
cat_feat.values.count("blue")

2

In [41]:
cat_feat.unique

['blue', 'green', 'orange']

In [10]:
mix_feat = FeatureColumn([1.2, 3.5, "Hello", 9.9])

AttributeError: Values are neither consistently numbers not strings

In [11]:
len(quant_feat)

4

In [12]:
print(quant_feat)

FeatureColumn(type=Quantitative, values=[1.2, 3.5, 6.9, 9.9])


In [13]:
int_feat = FeatureColumn([1, 7, 9, 6, 8, 0])

In [14]:
int_feat.median()

6.5

In [15]:
int_feat_2 = FeatureColumn([10, 10, 10, 10, 0, 0])

In [16]:
int_feat + int_feat_2

FeatureColumn(type=Quantitative, values=[11, 17, 19, 16, 8, 0])

In [22]:
FeatureColumn.from_file("../datasets/feature-example.txt")

FeatureColumn(type=Quantitative, values=[1.0, 2.0, 10.0, 9.3, 8.0, 10.0])