# Introduction to Data Model
### By Ankush Chander

In [None]:
# !pip install spacy
# !python -m spacy download en_core_web_sm

# Motivation:
How to write user friendly easy to read pythonic objects

# What is data model?

The Python data model refers to the way data is represented and manipulated in the Python programming language. It defines the interfaces and protocols that classes can implement to take advantage of Python's features such as iteration, comparison, and customization of behavior.


By understanding and implementing these protocols in your classes, you can create objects that seamlessly integrate with Python's built-in functions and operators, making your code more readable, expressive, and consistent with the language's conventions.

Python data models consists of:
1. Objects, values and types
2. The standard type hierarchy
3. **Special methods**
4. Coroutines

# Standard-types vs user-defined types
The principal built-in types are:

1.   numerics :  int, float, complex
2.   sequences: string, tuple, bytes, sets, fronzen_set
3.   mappings: dictionaries
4.   classes
5.   instances
6.   exceptions


User defined types: objects created using class keyword



# Special methods/dunder methods
Special methods are methods that start and end with double underscores(hence also called dunder methods). These methods define how objects behave in various contexts, such as when they are created, compared, printed, or used in arithmetic operations.


Note: The first thing to know about special methods is that they are meant to be called by the Python interpreter, and not by you. You don’t write my_object.\_\_repr\_\_(). You write repr(my_object) and, if my_object is an instance of a user-defined class, then Python calls the  instance method you implemented.

# Make your objects easy to debug

In [12]:
class TechTalk:
  def __init__(self, speaker, venue, topic):
    self.speaker = speaker
    self.venue = venue
    self.topic = topic
    #print("called init")

talk2  = TechTalk("Ankush", "online", "Data model")
print(talk2)

called init
<__main__.TechTalk object at 0x7fbd9e3e8f70>


In [14]:
class BetterTechTalk:
  def __init__(self, speaker, venue, topic):
    self.speaker = speaker
    self.venue = venue
    self.topic = topic

  def __repr__(self):
    return f"BetterTechTalk('{self.speaker}', {self.venue}, {self.topic})"

  def __str__(self):
    return f"Talk by {self.speaker} on {self.topic}"

talk2  = BetterTechTalk("Vimal", venue="online", topic="Embeddings")
print(talk2)
# print(repr(talk2))

Talk by Vimal on Embeddings


# Emulate sequence behaviour in your objects

### properties/methods supported by a list

In [15]:
# Operations available to sequences
from random import choice, shuffle


a = [1, 2, 4, 5, 6, 9] # standard type list


# indexing: fetch 0th value of the list
print(f"indexing => a[0]: {a[0]}")
# indexing => a[0]: 1


# slicing: fetch a sliced list from the original list
print(f"slicing=> a[0:3]: {a[0:3]}")
# slicing=> a[0:3]: [1, 2, 4]


# iteration
for i in a:
    print(i*2)
    break

# list comprehension
squares = [i*i for i in a]
print(f"squares: {squares}")
# squares: [1, 4, 16, 25, 36, 81]


# choose a random element
print(f"random selection: {choice(a)}")
#random selection: 2  

# shuffle list
print(f"before shuffling: {a}")
# before shuffling: [1, 2, 4, 5, 6, 9]

shuffle(a)
print(f"shuffled list: {a}")
# shuffled list: [9, 1, 4, 5, 2, 6]

# sort
print(f"sorted list: {sorted(a, key=lambda x:x)}")
# sorted list: [1, 2, 4, 5, 6, 9]

indexing => a[0]: 1
slicing=> a[0:3]: [1, 2, 4]
2
squares: [1, 4, 16, 25, 36, 81]
random selection: 6
before shuffling: [1, 2, 4, 5, 6, 9]
shuffled list: [1, 5, 9, 4, 2, 6]
sorted list: [1, 2, 4, 5, 6, 9]


### not so good class

In [6]:
# user defined type
import collections
Card = collections.namedtuple('Card', ['rank', 'suit'])

# not so pythonic object
class NotGoodCardDeck:
  suits = 'spades diamonds clubs hearts'.split()
  ranks= [str(n) for n in range(2, 11)] + list('JQKA')
  def __init__(self):
    self._cards = [Card(rank, suit) for suit in self.suits for rank in self.ranks]

  def shuffle(self):
    pass

  def pick_random_card(self):
    pass

  def pick_cards_by_suit(self, suite):
    pass

deck = NotGoodCardDeck()

# shuffle
deck.shuffle()

# pick random card
deck.pick_random_card()



In [16]:
import collections
Card = collections.namedtuple('Card', ['rank', 'suit'])

# pythonic object
class CardDeck:
  suits = 'spades diamonds clubs hearts'.split()
  ranks= [str(n) for n in range(2, 11)] + list('JQKA')
  def __init__(self):
    self._cards = [Card(rank, suit) for suit in self.suits for rank in self.ranks]

  def __len__(self):
    return len(self._cards)

  def __getitem__(self, index):
    return self._cards[index]

  def __setitem__(self, position, card):
    self._cards[position] = card


In [17]:
deck = CardDeck()

# by implementing __getitem__ we are able to support following things:

# indexing: fetch 0th value of the list
print(f"indexing => deck[0]: {deck[0]}")
# indexing => deck[0]: Card(rank='2', suit='spades')abs


# slicing: fetch a sliced list from the original list
print(f"slicing=> deck[0:3]: {deck[0:3]}")
# slicing=> deck[0:3]: [Card(rank='2', suit='spades'), Card(rank='3', suit='spades'), Card(rank='4', suit='spades')]


# iteration
for card in deck: # doctest: +ELLIPSIS
    print(card)
    #Card(rank='2', suit='spades')
    break

# check for membership
print(f"Card('Q', 'hearts') in deck: {Card('Q', 'hearts') in deck}")
# True
print(f"Card('Q', 'beasts') in deck: {Card('Q', 'beasts') in deck}")
# False

# reverse the deck
for card in reversed(deck): # doctest: +ELLIPSIS
    print(card)
    # Card(rank='A', suit='hearts')
    break


# choose a random element
print(f"random selection: {choice(deck)}")
# random card

# shuffle list
print(f"before shuffling: {deck[0:3]}")
shuffle(deck)
print(f"shuffled list: {deck[0:3]}")

# sorting
suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)
def spades_high(card):
  rank_value = CardDeck.ranks.index(card.rank)
  return rank_value * len(suit_values) + suit_values[card.suit]

print(f"sorted list: {sorted(deck, key=spades_high)[-1:]}")
# Card(rank='A', suit='spades')

indexing => deck[0]: Card(rank='2', suit='spades')
slicing=> deck[0:3]: [Card(rank='2', suit='spades'), Card(rank='3', suit='spades'), Card(rank='4', suit='spades')]
Card(rank='2', suit='spades')
Card('Q', 'hearts') in deck: True
Card('Q', 'beasts') in deck: False
Card(rank='A', suit='hearts')
random selection: Card(rank='6', suit='diamonds')
before shuffling: [Card(rank='2', suit='spades'), Card(rank='3', suit='spades'), Card(rank='4', suit='spades')]
shuffled list: [Card(rank='6', suit='clubs'), Card(rank='K', suit='spades'), Card(rank='5', suit='diamonds')]
sorted list: [Card(rank='A', suit='spades')]


## Takeaway
By implementing the special methods \_\_len\_\_ and \_\_getitem\_\_, our CardDeck *behaves like a standard Python sequence*, allowing it to benefit from:
 1.  core language features **(e.g., indexing, iteration, slicing, membership)**
 2.  standard library functions, as shown by the examples using **random.choice, reversed, and sorted.**

# Emulate Numeric types

In [9]:
# numeric properties
a =1
b =2


# addition
a+b # returns 3

# subtraction
a-b # returns -1
# multiplication

a*b # returns 2
# absolute value of a number
abs(-10) # returns 10

# check truthiness of a number
bool(a) # return False if a = 0 else True


True

![](https://www.oreilly.com/api/v2/epubs/9781491946237/files/assets/flup_0101.png)

In [23]:
# Implement a vector class
from math import hypot
class Vector():
    """Constructor function"""
    def __init__(self, x, y):
        self.x = x
        self.y = y

    """Make it easy to debug using __str__"""
    def __str__(self):
        return f"{self.x}i + {self.y}j"

    """Make it easy to reproduce"""
    def __repr__(self):
        return f"Vector({self.x}, {self.y})"

    """Implement vector addition using __add__"""
    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        return Vector(x,y)

    """Implement scalar multiplication"""
    def __mul__(self, scalar):
        x = scalar * self.x
        y = scalar * self.y
        return Vector(x,y)

    def __rmul__(self,scalar):
        x = scalar * self.x
        y = scalar * self.y
        return Vector(x,y)

    
    """Magnitude of vector"""
    def __abs__(self):
        return hypot(self.x, self.y)

    """Truth value of a vector"""
    def __bool__(self):
        return bool(abs(self))

a = Vector(3,4)
b = Vector(5,6)
print(a)
print(repr(a))
# Add two vectors objects using "+" operator
print(a + b)
# Scalar multiplication using "*" operator
print(a*5)
print(5*a)
# Calculate magnitude of the vector using abs method
print(abs(a))
# Calculater truth value using bool function
print(bool(a))
print(bool(Vector(0,0)))

3i + 4j
Vector(3, 4)
8i + 10j
15i + 20j
15i + 20j
5.0
True
False


# Example: Spacy Doc oject


In [25]:
import spacy
from random import choice, shuffle


nlp = spacy.load("en_core_web_sm")

doc = nlp("The iceberg is called the Python data model, and it describes the API that you can use to make your own objects play well with the most idiomatic language features. You can think of the data model as a description of Python as a framework.")


# access tokens using indexing
print(f"indexing: {doc[2]}")

#access phrases using slicing
print(f"slicing: {doc[5:8]}")

# filter words using list comprehension
verbs = [token for token in doc if token.pos_=="VERB"]
print(f"verbs:{verbs}")


# reverse the words in doc
for token in reversed(doc):
  print(token)
  break

# pick a rendom word from text
choice(doc)
# returns random word from sentence 




indexing: is
slicing: Python data model
verbs:[called, describes, use, make, play, think]
.


use

# Zen of python

In [32]:
# Zen of python
import this


# What we covered:
1. What is python data model?
2. Special functions(dunder methods)
2. Make your objects easy to debug using str and repr functions
3. Make your objects behave like sequences.
4. Function overloading (provide functions like bool, abs to your objects)
5. Operator overloading (provide operators like "+", "*" to your objects)
6. How spacy leverages data model to make the library intuitive for users
7. Zen of python

# References
0. [Stackoverflow - Difference between str and repr](https://stackoverflow.com/questions/1436703/difference-between-str-and-repr)
1. [Python reference - Data model](https://docs.python.org/3/reference/datamodel.html#special-method-names)
2. [Chapter 1. The Python Data Model](https://www.oreilly.com/library/view/fluent-python/9781491946237/ch01.html)
3. [Book:  Fluent python by Luciano Ramalho](https://www.goodreads.com/book/show/22800567-fluent-python)
4. [SPACY v3: Design concepts explained (behind the scenes)](https://youtu.be/BWhh3r6W-qE?si=I8Lohl4QB4GJtiUb)
