In [1]:
import pandas as pd
import numpy as np

# Data Structures

---

## Native Data Structures

In [2]:
print("data science")

data science


In [3]:
a = 1
b = 2
c = a/b

In [4]:
a, b, c

(1, 2, 0.5)

In [5]:
print(type(a))
print(type(b))
print(type(c))

<class 'int'>
<class 'int'>
<class 'float'>


In [6]:
d = [1, 2, 3, 4]

In [7]:
print(type(d))

<class 'list'>


In [8]:
#python is a 0 index language
d[0]

1

In [9]:
d[2]

3

In [10]:
d.append('cat')

In [11]:
d

[1, 2, 3, 4, 'cat']

In [12]:
d[-1]

'cat'

## List Comprehension

In [13]:
# .pop() takes removes last item from a list
d.pop()

'cat'

In [14]:
d

[1, 2, 3, 4]

In [15]:
#square each element in d
[elem ** 2 for elem in d]

[1, 4, 9, 16]

In [16]:
#for loops in python are slow, list comprehension tends to be faster
new_list = []

for elem in d:
    new_list.append(elem**2)
    
    
new_list

[1, 4, 9, 16]

In [17]:
new_list[2]

9

---

## Dictionaries

Generally harder to construct dictionaries rather than lists

key:value

In [18]:
e = {'a':1, 'b':2, 'dog':'best'}

In [19]:
e['b']

2

In [20]:
password_dict = {'password1':'abcc', 'password2':'dce'}

In [21]:
password_dict['password2']

'dce'

In [22]:
e['b'], e['dog']

(2, 'best')

In [23]:
e['cat']='ok'

In [24]:
e

{'a': 1, 'b': 2, 'dog': 'best', 'cat': 'ok'}

In [25]:
mylist = ['a', 'b', 'c']

mydict = {a: a+a for a in mylist}

In [26]:
mydict

{'a': 'aa', 'b': 'bb', 'c': 'cc'}

In [27]:
'cat' + 'dog'

'catdog'

In [28]:
'cat' + ' ' +'dog'

'cat dog'

# Concepts
- Functions
- Lambda
- Numpy

In [29]:
def addone(a):
    return a + 1

In [30]:
addone(2)

3

In [31]:
def addone(a):
    print("hello")
    return a + 1

In [32]:
addone(2)

hello


3

In [33]:
answer = addone(2)
answer

hello


3

In [34]:
def addone(a):
    return (a + 1, a + 2, a +3)

In [35]:
b = 1
addone(b)

(2, 3, 4)

In [36]:
#parenthesis are unnecessary to produce answer
def addone(a):
    return a + 1, a + 2, a +3, a**2

In [37]:
b = 1
addone(b)

(2, 3, 4, 1)

#### Lambda

In [38]:
#lambda takes in x and returns x + 1
addone = lambda x: x+1

In [39]:
addone(3)

4

In [40]:
addone(3.5)

4.5

In [41]:
squaring = lambda x: x**2
squaring(5)

25

In [42]:
#lambda can take in multiple inputs, req's parenthesis
squaring = lambda x, y: (x**2, y**2)
squaring(5, 6)

(25, 36)

### Numpy

In [43]:
import numpy as np #good at 'broadcasting', avoiding for loops

In [44]:
a = [1, 2, 3]
b = [4, 5, 6]
a + b #doesn't actually add, it appends a to b

[1, 2, 3, 4, 5, 6]

In [45]:
a +b

array([5, 7, 9])

#### Zip
- Zips elements together

In [46]:
a = [1, 2, 3]
b = [4, 5, 6]

In [47]:
for elem_a, elem_b in zip(a, b):
    print(elem_a, elem_b) 

1 4
2 5
3 6


In [48]:
a = [1, 2, 3]
b = [4, 5]
for elem_a, elem_b in zip(a, b):
    print(elem_a, elem_b) #b/c 3 has nothing to match with, only zips first two elements of each list

1 4
2 5


---

In [49]:
import numpy as np
def euclidean_distance(x, y):
    """sqrt((x_1 - y_1)^2 + ... + (x_N - y_N)^2)"""
    
    #assert len(x) == len(y), "dimension mismatch"
    d = [a-b for a, b in zip(x,y)] #for each element in x and y, zip together and subtract
    d = [e**2 for e in d]
    return sum(d) ** 0.5 #get the sqrt w/o numpy 

In [50]:
x = [1, 2, 3]
y = [4, 5, 6]
euclidean_distance(x, y)

5.196152422706632

---

In [57]:
import numpy as np
def euclidean_distance(x, y):
    """sqrt((x_1 - y_1)^2 + ... + (x_N - y_N)^2)"""
    
    #assert len(x) == len(y), "dimension mismatch"
    #d = [a-b for a, b in zip(x,y)] #for each element in x and y, zip together and subtract 

    d = x - y #set distance first to x - y
    d = np.square(d) #square each element
    return np.sqrt(d.sum()) #get the sqrt of sum of d


In [58]:
#must send in numpy array
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])


euclidean_distance(x, y)

5.196152422706632

#### One Liner function for Euclidean Distance

In [59]:
def euclidean_distance(x, y):
    """sqrt((x_1 - y_1)^2 + ... + (x_N - y_N)^2)"""
    
    #assert len(x) == len(y), "dimension mismatch"
    #d = [a-b for a, b in zip(x,y)] #for each element in x and y, zip together and subtract 

#     d = x - y #set distance first to x - y
#     d = np.square(d) #square each element
    return np.sqrt(((x - y) ** 2).sum()) #one liner

In [60]:
#must send in numpy array
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])


euclidean_distance(x, y)

5.196152422706632

# Conditonal Statements and For Loops

In [62]:
a = 1

if a < 0:
    print ("a is negative")
elif a == 0:
    print('a is zero')
else:
    print("a is positive")

a is positive


In [64]:
mystr ='cat'

if mystr == 'cat':
    print('match')
    

match


In [65]:
mystr = 'is there a cat in here'
if "is ther" in mystr:
    print("match")

match


In [66]:
mystr = None
if mystr:
    print("there's something")
else:
    print("there's nothing")

there's nothing


In [67]:
mystr = 0
if mystr:
    print("there's something")
else:
    print("there's nothing")


there's nothing


In [69]:
mystr = True
if mystr:
    print("there's something")
else:
    print("there's nothing")

there's something


In [70]:
mystr = False
if mystr:
    print("there's something")
else:
    print("there's nothing")

there's nothing


In [71]:
mystr = 1 == 1
if mystr:
    print("there's something")
else:
    print("there's nothing")

there's something


In [72]:
1 == 1

True

# For Loops

- Avoid for loops where possible, computationally inefficient. List comprehension, lambdas, etc. are more efficient

In [74]:
for i in range(10): #inclusive:exclusive
    print(i)

0
1
2
3
4
5
6
7
8
9


#### Get even numbers in a range

In [75]:
for i in range(10):
    if i % 2 == 0:
        print(i)

0
2
4
6
8


#### Get odd numbers in a range

In [76]:
for i in range(10):
    if i % 2 == 1:
        print(i)

1
3
5
7
9


#### Print even numbers in a range from 0 - 9 that continues at 4 and breaks at 8, print out the even numbers

In [77]:
for i in range(10):
    if i == 4:
        continue #skips at 4
    if i == 8:
        break
    if i % 2 == 0:
        print(i)

0
2
6


In [79]:
for i in range(11):
    if i == 4:
        continue #skips at 4
#     if i == 8:
#         break
    if i % 2 == 0:
        print(i)

0
2
6
8
10


#### (break only breaks the inner most loop)

In [80]:
for j in range(2):
    for i in range (11):
        if i == 4:
            continue
        if i == 8: #broke at 8, but then does 0 - 6 again
            break
        if i % 2 == 0:
            print(i)

0
2
6
0
2
6


## Nesting

In [82]:
for i in range(4): #i going 0-3
    for j in range(4): #j going 0-3
        if j > i:
            break
        print(i, j)

0 0
1 0
1 1
2 0
2 1
2 2
3 0
3 1
3 2
3 3


# Dictionaries + Conditional Statements

In [83]:
for key, val in mydict.items():
    print(key, val*2)

a aaaa
b bbbb
c cccc


In [84]:
#get key:value pair
mydict.items()

dict_items([('a', 'aa'), ('b', 'bb'), ('c', 'cc')])

In [85]:
#get keys
mydict.keys()

dict_keys(['a', 'b', 'c'])

In [86]:
#get values
mydict.values()

dict_values(['aa', 'bb', 'cc'])

In [89]:
#get each letter of string
for letter in "cat":
    print(letter)

c
a
t


# Summary
- `If` statements check for True/False in result
- `None/0` is the same as putting `False` in an if statement
- `For loops` need something to iterate over, pretty inefficient compared to list comprehension
- `Continue` means skip to next iteration
- `break` breaks the inner loop
