# Useful packages for Python
* NumPy SciPy Pandas - fast data processing and calculation
* Matplotlib Seaborn Bokeh Plotly - data visualization
* SciKit-Learn Keras TensorFlow Theano - Machine Learning and Deep Learning
* Gensim NLTK - Natural Language Processing
* Scrapy Statsmodels BeautifulSoup - Data Mining, Statistics

### how to import a package

In [2]:
import numpy as np # np is an alias for the numpy package

In [3]:
numbers = np.array([3, 4, 20, 15, 7, 19, 0]) # works fine

In [4]:
type(numbers) # numpy.ndarray

numpy.ndarray

In [6]:
numbers = np.array([3, 4, 20, 15, 7, 19, 0]) # NameError: name 'numpy' is not defined

In [7]:
dir()

['In',
 'Out',
 '_',
 '_5',
 '__',
 '___',
 '__builtin__',
 '__builtins__',
 '__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_dh',
 '_i',
 '_i1',
 '_i2',
 '_i3',
 '_i4',
 '_i5',
 '_i6',
 '_i7',
 '_ih',
 '_ii',
 '_iii',
 '_oh',
 'exit',
 'get_ipython',
 'np',
 'numbers',
 'quit']

### import the "pyplot" submodule from the "matplotlib" package with alias "plt"

In [5]:
import matplotlib.pyplot as plt

Matplotlib is building the font cache using fc-list. This may take a moment.


# Jupyter Magics

### change jupyter theme

In [7]:
!python -V

Python 3.7.4


In [9]:
!pip install scipy



In [2]:
??help

# Common data types in Python
* float - used for real numbers
* int - used for integers
* str - used for texts 
    - Defined using single quotes OR double quotes OR triple quotes 
    - The triple quoted strings can be on multiple lines (mostly used for documentation or html)
* bool - used for truthy values
    - Useful to perform a filtering operation on a data
* list - used to store a collection of values
* dict - used to store a key-values pairs
* set - dictionary without values, or a collection of keys


In [9]:
# reading, trycky python
# https://medium.com/@tyastropheus/tricky-python-ii-parameter-passing-for-mutable-immutable-objects-10e968cbda35

# List indexing: list[i] -> i+1 th element in the list


In [10]:
fruits = ["pineapple", "apple", "lemon", "strawberry", "orange", "kiwi"]

In [13]:
fruits[1]  # apple

'apple'

In [14]:
fruits[0]  # "pineapple"

'pineapple'

In [15]:
fruits[-1] # "kiwi"

'kiwi'

In [16]:
fruits[5]  # "kiwi"

'kiwi'

In [17]:
fruits[-3] # "strawberry"

'strawberry'

### List slicing: <code>list[i:j:s]</code> - step(s) is optional, and j th element is not included

In [18]:
fruits[::]    # ["pineapple", "apple", "lemon", "strawberry", "orange", "kiwi"]

['pineapple', 'apple', 'lemon', 'strawberry', 'orange', 'kiwi']

In [19]:
fruits[0:2]   # ["pineapple", "apple"]

['pineapple', 'apple']

In [20]:
fruits[-2:-1] # ["orange"]

['orange']

In [23]:
fruits[3:]    # ["strawberry", "orange", "kiwi"]

['strawberry', 'orange', 'kiwi']

In [15]:
fruits[:4]    # ["pineapple", "apple", "lemon", "strawberry"]

['pineapple', 'apple', 'lemon', 'strawberry']

In [16]:
fruits[:]     # ["pineapple", "apple", "lemon", "strawberry", "orange", "kiwi"]

['pineapple', 'apple', 'lemon', 'strawberry', 'orange', 'kiwi']

In [17]:
fruits[::-1]  # ["kiwi", "orange", "strawberry", "lemon", "apple", "pineapple"]

['kiwi', 'orange', 'strawberry', 'lemon', 'apple', 'pineapple']

In [18]:
fruits[::-2]  # ["kiwi", "strawberry", "apple"]

['kiwi', 'strawberry', 'apple']

In [19]:
fruits[::2]   # ["pineapple", "lemon", "orange"]

['pineapple', 'lemon', 'orange']

### Understanding some default values

In [20]:
fruits[0:6:1]    # the same result as the result from fruits[::]

['pineapple', 'apple', 'lemon', 'strawberry', 'orange', 'kiwi']

In [21]:
fruits[-1:-7:-1] # the same result as the result from fruits[::-1]

['kiwi', 'orange', 'strawberry', 'lemon', 'apple', 'pineapple']

# List manipulation

### Add values to a list

In [33]:
print(id(fruits))
fruits.append("peach")
fruits.append("banana")
print(fruits) # ["pineapple", "apple", "lemon", "strawberry", "orange", "kiwi", "peach"]
print(id(fruits))

4695361088
['pineapple', 'apple', 'lemon', 'strawberry', 'orange', 'kiwi', 'peach', 'peach', 'banana', 'fig', 'melon', 'peach', 'banana', 'fig', 'melon', 'peach', 'banana', 'peach', 'banana']
4695361088


In [34]:
fruits = fruits + ["fig", "melon"]
fruits # ["pineapple", "apple", "lemon", "strawberry", "orange", "kiwi", "peach", "fig", "melon"]

['pineapple',
 'apple',
 'lemon',
 'strawberry',
 'orange',
 'kiwi',
 'peach',
 'peach',
 'banana',
 'fig',
 'melon',
 'peach',
 'banana',
 'fig',
 'melon',
 'peach',
 'banana',
 'peach',
 'banana',
 'fig',
 'melon']

In [31]:
print(id(fruits))

4695361088


### Change values from a list

In [35]:
fruits[0:2] = ["grape", "mango"]
fruits # ["grape", "mango", "lemon", "strawberry", "orange", "kiwi", "peach", "fig", "melon"]

['grape',
 'mango',
 'lemon',
 'strawberry',
 'orange',
 'kiwi',
 'peach',
 'peach',
 'banana',
 'fig',
 'melon',
 'peach',
 'banana',
 'fig',
 'melon',
 'peach',
 'banana',
 'peach',
 'banana',
 'fig',
 'melon']

### Delete values from a list

In [30]:
fruits.remove("mango")
fruits # ["grape", "lemon", "strawberry", "orange", "kiwi", "peach", "fig", "melon"]

['grape',
 'lemon',
 'strawberry',
 'orange',
 'kiwi',
 'peach',
 'peach',
 'peach',
 'peach',
 'peach',
 'fig',
 'melon']

In [31]:
fruits.remove("peach")
fruits

['grape',
 'lemon',
 'strawberry',
 'orange',
 'kiwi',
 'peach',
 'peach',
 'peach',
 'peach',
 'fig',
 'melon']

# understand how list works behind the scene
### a copy was made for the reference of that list

In [1]:
numbers = [10, 42, 28, 420]
numbers_copy = numbers
numbers_copy[2] = 100

In [2]:
print(numbers)      # [10, 42, 100, 420]
print(numbers_copy) # [10, 42, 100, 420]

[10, 42, 100, 420]
[10, 42, 100, 420]


### a new list was generated with the same values

In [3]:
ratings = [4.5, 5.0, 3.5, 4.75, 4.00]
ratings_copy = ratings[:]
ratings_copy[0] = 2.0

In [5]:
print(ratings)      # [4.5, 5.0, 3.5, 4.75, 4.0]
print(ratings[:])
print(ratings_copy) # [2.0, 5.0, 3.5, 4.75, 4.0]

[4.5, 5.0, 3.5, 4.75, 4.0]
[4.5, 5.0, 3.5, 4.75, 4.0]
[2.0, 5.0, 3.5, 4.75, 4.0]


### list.copy() method - Python 3 only

In [6]:
a = [1,2,3]
b = a.copy()
b[0] = 2
print(a)
print(b)

[1, 2, 3]
[2, 2, 3]


### copy nested lists use copy.deepcopy

In [7]:
from copy import deepcopy
l = [[1,2,],[3,4]]
l_copy = deepcopy(l)
print(l_copy)
print(l.copy())

[[1, 2], [3, 4]]
[[1, 2], [3, 4]]


### list function also creates a new list instead of copying the reference

In [8]:
characters = ["A", "B", "C"]
characters_copy = list(characters)
characters_copy[-1] = "D"

In [9]:
print(characters)      # ["A", "B", "C"]
print(characters_copy) # ["A", "B", "D"]

['A', 'B', 'C']
['A', 'B', 'D']


# dictionary is collection of key-value pairs - similar to HashMap/unordered_map in Java/C++

### suppose you have (movie, rating) data and want fast access to the rating by movie names in O(1) runtime
### something you need to do if using list

In [10]:
movies = ["Ex Machina", "Mad Max: Fury Road", "1408"]
ratings = [7.7, 8.1, 6.8]
movie_choice_index = movies.index("1408")
print(ratings[movie_choice_index]) # 7.7

6.8


### dict comes to the rescue with intuitive and convenient code

In [19]:
ratings = {
   "Ex Machina": 7.7,
   "Mad Max: Fury Road": 8.1,
   "1408" : 6.8
}
print(ratings["1408"]) # 7.7

6.8


In [20]:
ratings = {}
list1 = []
list1

[]

### dictionary operations - add, modify, delete, and check existence of keys

In [21]:
ratings["2012"] = 10.0
print(ratings) # {'Ex Machina': 7.7, 'Mad Max: Fury Road': 8.1, '1408': 6.8, 'Deadpool': 8.0}

{'2012': 10.0}


In [22]:
ratings["2012"] = 9.8
print(ratings) # {'Ex Machina': 7.8, 'Mad Max: Fury Road': 8.1, '1408': 6.8, 'Deadpool': 8.0}

{'2012': 9.8}


In [23]:
del(ratings["2012"])
print(ratings) # {'Ex Machina': 7.8, 'Mad Max: Fury Road': 8.1, 'Deadpool': 8.0}

{}


In [24]:
print("2012" in ratings) # True

False


In [25]:
print("1408" in ratings) # True

False


# function definition in Python 

In [26]:
def is_prime(n):
   if n <= 1:
       return False
   elif n <= 3:
       return True
   elif n % 2 == 0 or n % 3 == 0:
       return False
   current_number = 5
   while current_number * current_number <= n:
       if n % current_number == 0 or n % (current_number + 2) == 0:
           return False
       current_number = current_number + 6
   return True

In [27]:
# e.g, 5, 6, 7, 8, 9, 10
# no need to check 6, 8, 9, 10, because they are the multiples of 2, 3, 5.
# 11, 12, 13, 14, 15, 16
# no need to check 12, 14, 15, 16, because they are the multiples of 2, 3, 5.

In [28]:
is_prime(113)

True

In [29]:
is_prime()

TypeError: is_prime() missing 1 required positional argument: 'n'

# methods in Python 

### String methods

In [30]:
text = "Data Science"
print(text.upper()) # "DATA SCIENCE"
print(text.lower()) # "data science"
print(text.capitalize()) # "Data science"

DATA SCIENCE
data science
Data science


### Lists methodsLists methods

In [31]:
numbers = [1, 4, 0, 2, 9, 9, 10]
numbers.reverse()
#numbers[::-1]
print(numbers) # [10, 9, 9, 2, 0, 4, 1]

[10, 9, 9, 2, 0, 4, 1]


In [32]:
numbers.sort()
print(numbers) # [0, 1, 2, 4, 9, 9, 10]

[0, 1, 2, 4, 9, 9, 10]


In [33]:
b = fruits.reverse()
print(b)

NameError: name 'fruits' is not defined

### Dictionary methods

In [34]:
ratings = {
   "Ex Machina": 7.7,
   "Mad Max: Fury Road": 8.1,
   "1408" : 6.8
}
print(ratings.keys()) # dict_keys(['Ex Machina', 'Mad Max: Fury Road', '1408'])
print(ratings.values()) # dict_values([7.7, 8.1, 6.8])
print(ratings.items()) # dict_items([('Ex Machina', 7.7), ('Mad Max: Fury Road', 8.1), ('1408', 6.8)])

dict_keys(['Ex Machina', 'Mad Max: Fury Road', '1408'])
dict_values([7.7, 8.1, 6.8])
dict_items([('Ex Machina', 7.7), ('Mad Max: Fury Road', 8.1), ('1408', 6.8)])


### methods with the same name could have different behavior depending on the class of the method

In [36]:
numbers = [10, 30, 55, 40, 8, 30]
text = "Data Science"

print(numbers.index(8))  # 4
print(text.index("a"))  # 1

4
1


In [37]:
print(numbers.count(30)) # 2
print(text.count("i"))  # 1

2
1


### Some methods can change the objects they are called on

In [38]:
print(numbers) # [10, 30, 55, 40, 8, 30]
print(id(numbers))
numbers.append(101) # append 101 at the end of list
print(numbers) # [10, 30, 55, 40, 8, 30, 101] 
print(id(numbers))

[10, 30, 55, 40, 8, 30]
4559970048
[10, 30, 55, 40, 8, 30, 101]
4559970048


# swap values between/among variables

In [39]:
a, b = 5, 10
print(a, b) # 5 10

5 10


In [40]:
a, b = b, a
print(a, b) # 10 5

10 5


# concat list to string

In [41]:
a = ['Python', 'is', 'Art']
print(" ".join(a))
print(",".join(a))

Python is Art
Python,is,Art


# find the most frequent element in a list

In [42]:
a = [1,2,3,1,2,3,2,2,4,5,1]
print(max(set(a), key=a.count))

2


In [43]:
??max

In [44]:
set(a)

{1, 2, 3, 4, 5}

In [45]:
a.count(2)

4

### learn more about the max + key usage

In [46]:
# https://stackoverflow.com/questions/18296755/python-max-function-using-key-and-lambda-expression

In [48]:
lst = [(1,'a'), (3,'c'), (4,'e'), (-1,'z')]
print(min(lst, key=lambda x: x[0]))

(-1, 'z')


### using Counter from collections

In [49]:
from collections import Counter
a = [1,2,3,1,2,3,2,2,4,5,1]
cnt = Counter(a)
print(cnt)
print(cnt.most_common(3))

Counter({2: 4, 1: 3, 3: 2, 4: 1, 5: 1})
[(2, 4), (1, 3), (3, 2)]


### check if 2 strings consist exact same set of characters

In [50]:
Counter('groot') == Counter('troog')

True

# reverse string (or list)

In [51]:
a = 'asdfghjkl'
print(a[::-1])

lkjhgfdsa


# iterating reversely (or list)

In [52]:
for char in reversed(a):
    print(char)

l
k
j
h
g
f
d
s
a


# reverse integer

In [53]:
a = 123456
print(int(str(a)[::-1]))

654321


# transpose 2d array

In [54]:
original = [['a','b'],['c','d'],['e','f']]
transposed = zip(*original)
print(list(transposed))

[('a', 'c', 'e'), ('b', 'd', 'f')]


In [55]:
lst1 = ['a','b']
lst2 = ['c', 'd']
lst3 = ['e','f']
print(list(zip(lst1, lst2, lst3)))

[('a', 'c', 'e'), ('b', 'd', 'f')]


In [56]:
print(zip(lst1, lst2, lst3))

<zip object at 0x10fcbc9b0>


In [57]:
?zip

# * usage

### The single star * unpacks the sequence/collection into positional arguments, so you can do this:

In [58]:
# unpacking in function call
def sum(a, b):
    return a + b
values = (1, 2)
#s = sum(value)
s = sum(*values)
s

3

In [59]:
# This will unpack the tuple so that it actually executes as:
s = sum(1, 2)
s

3

In [60]:
# packing in assignment
x, *xs, y = (1, 2, 3, 4)
print(xs)

[2, 3]


# chained comparison

In [61]:
b = 6
print(4 < b < 7)
print(1 == b < 20)

True
False


# conditional function call with same arguments

In [62]:
def product(a, b):
    return a * b

def add(a, b):
    return a + b

boo = True
print((product if boo else add)(5, 7))

35


# remove duplicates 

In [63]:
# using set - may not preserve order
l = [2, 1, 3, 4, 5, 4, 3, -99, 3568]
print(set(l))
print(list(set(l)))

{1, 2, 3, 4, 5, 3568, -99}
[1, 2, 3, 4, 5, 3568, -99]


# dedupe and keep order

In [64]:
from collections import OrderedDict
print(list(OrderedDict.fromkeys(l).keys()))

[2, 1, 3, 4, 5, -99, 3568]


# sort dictionary by its value

In [69]:
# using built-in sort method
d = {"apple":10, "orange":20, "banana": 5, "rotten tomato": 1}
print(sorted(d.items(), key=lambda x: x[0]))

[('apple', 10), ('banana', 5), ('orange', 20), ('rotten tomato', 1)]


In [70]:
print(d.keys()) 
print(d.values()) 
print(d.items()) 

dict_keys(['apple', 'orange', 'banana', 'rotten tomato'])
dict_values([10, 20, 5, 1])
dict_items([('apple', 10), ('orange', 20), ('banana', 5), ('rotten tomato', 1)])


# sort keys by value

In [95]:
print(sorted(d, key=d.get))

['rotten tomato', 'banana', 'apple', 'orange']


In [102]:
d.get('apple', 0)


10

In [98]:
d["apple"]

10

In [100]:
d.get('kiwi', 0)

0

In [101]:
d["kiwi"]

KeyError: 'kiwi'

In [103]:
list(d)

['apple', 'orange', 'banana', 'rotten tomato']

In [104]:
sorted(d) # when call d, the input is the list of key values

['apple', 'banana', 'orange', 'rotten tomato']

# convert list to comma separated string

In [105]:
l = ['foo', 'bar', 'baz']
print(', '.join(l))

foo, bar, baz


In [106]:
num = [1,2,3,4]
print(', '.join(map(str, num)))

1, 2, 3, 4


In [107]:
for ele in map(str, num):
    print(ele)

1
2
3
4
