# How to implement datascience code?

In [1]:
from random import randint

def six_sided():
    return randint(1,6)

def roll_dice():
    return six_sided()+six_sided()

- Hard to debug
- Hard to refactor

## Abstraction with `__repr__`

In [2]:
import random

from abc import ABC, abstractmethod

class Distribution(ABC):
    @abstractmethod
    def sample(self):
        pass


We have defined an abstract class defining interfaces for probability distributions in general.

In [3]:
class Die(Distribution):
    def __init__(self, sides):
        self.sides = sides # Attribute

    # This one makes debugging much easier
    # A function cannot do this
    def __repr__(self):
        return f"Die(sides={self.sides})"

    def sample(self):
        return random.randint(1, self.sides)

In [4]:
print(Die(6))
six_sided = Die(6)
print(six_sided==six_sided)

# It shows False although they are conceptually identical 
print(six_sided==Die(6))

Die(sides=6)
True
False


## Abstraction with `__eq__`

In [5]:
import random

class Die(Distribution):
    def __init__(self, sides):
        self.sides = sides # Attribute

    # This one makes debugging much easier
    # A function cannot do this
    def __repr__(self):
        return f"Die(sides={self.sides})"

#    def __eq__(self, other):
#        return self.sides==other.sides

    def __eq__(self, other):
        if isinstance(other, Die):
            return self.sides==other.sides
        return False

    def sample(self):
        return random.randint(1, self.sides)


In [6]:
print(Die(6))
six_sided = Die(6)
print(six_sided==six_sided)

# It shows False although they are conceptually identical 
print(six_sided==Die(6))

Die(sides=6)
True
True


## Dataclass

In [7]:
import random
from dataclasses import dataclass

@dataclass
class Die(Distribution):
    def __init__(self, sides:int)->None:
        self.sides = sides # Attribute
    
    def sample(self):
        return random.randint(1, self.sides)

In [8]:
print(Die(6))
six_sided = Die(6)
print(six_sided==six_sided)

# It shows False although they are conceptually identical 
print(six_sided==Die(6))

Die()
True
True


## Dataclass with Frozen

In [9]:
import random
import dataclasses
from dataclasses import dataclass

@dataclass(frozen=True)
class Die(Distribution):
    sides: int # Specify a data type, type annotations or type hints

    def sample(self)->int:
        return random.randint(1, self.sides)

In [10]:
die = Die(6)
print(die.sample())
d10 = dataclasses.replace(die, sides=10)
print(d10)

# We can use frozen dataclass as a dictionary key
sample_dict = {die: "abs"} 
print(sample_dict)

4
Die(sides=10)
{Die(sides=6): 'abs'}


## Static Typing

Being able to find type mismatches without running code is called static typing. 

In [31]:
from typing import Generic, TypeVar

class Distribution(ABC):
    @abstractmethod
    def sample(self):
        pass