# SD212: Graph mining

# Python basics

This notebook presents some Python basics:
* lists, sets, dictionaries
* numpy arrays
* files (for loading / saving data)

Recall that you can use:
* `tab` for completion
* ? for inline help

In [1]:
# example
sorted?

## Import

In [2]:
import numpy as np

## List

In [3]:
names = ['Alice', 'Bernard', 'Carole', 'David']

In [4]:
names[-2:]

['Carole', 'David']

In [5]:
names.append('Elodie')

In [6]:
names += ['Ferdinand', 'Gabrielle']

In [7]:
names

['Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle']

In [8]:
names_ = [name for name in names if 'i' not in name]

In [9]:
names_

['Bernard', 'Carole']

## Set

In [10]:
set(names)

{'Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle'}

In [11]:
type(set(names))

set

In [12]:
set(names_) <= set(names)

True

In [13]:
set(names) - set(names_)

{'Alice', 'David', 'Elodie', 'Ferdinand', 'Gabrielle'}

In [14]:
names += ['Alice']

In [15]:
names

['Alice',
 'Bernard',
 'Carole',
 'David',
 'Elodie',
 'Ferdinand',
 'Gabrielle',
 'Alice']

In [16]:
set(names)

{'Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle'}

In [17]:
len(names)

8

In [18]:
len(set(names))

7

In [29]:
names_set = set(names)

In [30]:
names_set_ = {'Hector', 'Irina'}

In [31]:
"""
SETS OPERATORS:
A &= B : return intersection of A and B
A |= B : return union of A and B
A -= B : return A minus B
A ^= B : return ~(intersection of A and B)
"""
names_set &= names_set_

In [32]:
names_set

set()

In [33]:
names_set |= names_set_

In [34]:
names_set_.add('Alice')

In [35]:
names_set & names_set_

{'Hector', 'Irina'}

## Dictionary

In [36]:
len(names)

8

In [37]:
names

['Alice',
 'Bernard',
 'Carole',
 'David',
 'Elodie',
 'Ferdinand',
 'Gabrielle',
 'Alice']

In [38]:
length = {name: len(name) for name in names}

In [39]:
len(length)

7

In [40]:
list(length.keys())

['Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle']

In [41]:
list(length.values())

[5, 7, 6, 5, 6, 9, 9]

In [43]:
length

{'Alice': 5,
 'Bernard': 7,
 'Carole': 6,
 'David': 5,
 'Elodie': 6,
 'Ferdinand': 9,
 'Gabrielle': 9}

In [45]:
for name in length:
    print(name, length[name])

Alice 5
Bernard 7
Carole 6
David 5
Elodie 6
Ferdinand 9
Gabrielle 9


In [46]:
length_ = {name: len(name) for name in names_}

In [47]:
length.update(length_)

## Numpy

In [53]:
vector = np.array([5, 1, 2, 4])

In [54]:
np.zeros_like(vector, dtype=int)

array([0, 0, 0, 0])

In [55]:
vector[-2:]

array([2, 4])

In [56]:
np.sort(vector)

array([1, 2, 4, 5])

In [57]:
np.argsort(vector)

array([1, 2, 3, 0])

In [58]:
np.argsort(-vector)

array([0, 3, 2, 1])

In [59]:
matrix = np.arange(12).reshape(3, -1)

In [60]:
matrix.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [61]:
matrix.sum(axis = 0)

array([12, 15, 18, 21])

In [62]:
matrix.sum(axis = 1)

array([ 6, 22, 38])

In [63]:
matrix.dot(vector)

array([ 17,  65, 113])

In [64]:
matrix[:2]

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [65]:
matrix[:2][:,1:]

array([[1, 2, 3],
       [5, 6, 7]])

In [66]:
indices = np.array([0, 1, 1, 2, 0])

In [67]:
vector[indices]

array([5, 1, 1, 2, 5])

In [68]:
vector_ = vector[indices]

In [69]:
np.unique(vector_)

array([1, 2, 5])

In [70]:
np.unique(vector_, return_counts=True)

(array([1, 2, 5]), array([2, 1, 2]))

In [71]:
vector = np.arange(-3, 10)

In [72]:
vector > 0

array([False, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [73]:
np.sum(vector > 0)

9

In [74]:
vector[vector > 0]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [75]:
index = np.argwhere(vector > 0).ravel()

In [80]:
vector[index]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [81]:
np.repeat(np.arange(5), np.arange(5))

array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])

In [82]:
np.repeat(np.arange(5), 2 * np.arange(5))

array([1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4])

In [105]:
for i in range(7):
    print(np.add.reduceat([1,2,3,4,5,6,7],[i,1]))

[ 1 27]
[ 2 27]
[ 3 27]
[ 4 27]
[ 5 27]
[ 6 27]
[ 7 27]


In [106]:
for i in range(7):
    print(np.add.reduceat([1,2,3,4,5,6,7],[i,3]))

[ 6 22]
[ 5 22]
[ 3 22]
[ 4 22]
[ 5 22]
[ 6 22]
[ 7 22]


In [107]:
np.add.reduceat(np.arange(10), [0, 1, 2, 3, 4])

array([ 0,  1,  2,  3, 39])

In [108]:
np.add.reduceat(np.arange(10), [0, 4, 6])

array([ 6,  9, 30])

In [None]:
np.add.reduceat(np.arange(12).reshape(4, -1), [0, 2, 3])

In [None]:
np.add.reduceat(np.arange(12).reshape(4, -1), [0, 2], axis=1)

In [None]:
np.random.choice(6)

In [None]:
np.random.choice([1,3,6], size=4)

In [None]:
np.random.choice([1,3,6], p=[0.1, 0.8, 0.1], size=4)

In [None]:
np.save('vector', vector)

In [None]:
np.load('vector.npy')

## Files

In [None]:
with open('names.txt', 'w') as f:
    for name in names:
        f.write(name + '\n')

In [None]:
names_ = []
with open('names.txt', 'r') as f:
    for row in f:
        # remove \n
        names_.append(row[:-1])

In [None]:
names_