# Basic types and constructions in Python

## 1. Basic types: numeric types

In [1]:
# int
num = 5
print(num)
print(type(num))

5
<class 'int'>


In [2]:
# float
num = 13.4
print(num)

num = 1.5e2
print(type(num))

13.4
<class 'float'>


In [3]:
# Python may convert types
num = int(num)
print(num, type(num))

num = float(num)
print(num, type(num))

150 <class 'int'>
150.0 <class 'float'>


## 2. Basic types: **logical types**

In [4]:
# bool
print(3 > 4)
print(3 <= 3)
print(6 >= 6)
print(6 < 5)

False
True
True
False


In [5]:
x=False  
y=True
print(x and y) # logical and
print(x or y) # logical or
print(not y) # logical not

False
True
False


In [6]:
# example
x, y, z = True, False, True
result = x and y or z
print(result)

True


## 3. Basic types: string

In [7]:
example_string = "Python course in ITMO"
print(example_string)
print(type(example_string))

Python course in ITMO
<class 'str'>


In [8]:
example_string[7]

'c'

In [9]:
# combine strings 
"You " + "can " + "just " + "add " + "one " + "string " + "to another "

'You can just add one string to another '

In [10]:
# slicing string
example_string = "Python cource in ITMO"
example_string[17:21]

'ITMO'

In [11]:
# 'in' operator 
"3.14" in "Pi number = 3.1415926"

True

In [12]:
example_string = "ITMO"
for letter in example_string:
    print("Letter", letter)

Letter I
Letter T
Letter M
Letter O


In [13]:
# create string 
"{num} K ought to be enough for anybody. ({author})".format(
    num="ITMO", author="Bill Gates")

'ITMO K ought to be enough for anybody. (Bill Gates)'

## 4. Basic types: None object



In [14]:
z = None
print(z)
print(type(z))

None
<class 'NoneType'>


# Collections in Python

## 1. Collections: list

In [15]:
list0 = []

In [16]:
saled_goods_count = [33450, 34010, 33990, 33200]

print(saled_goods_count)
print(type(saled_goods_count))

[33450, 34010, 33990, 33200]
<class 'list'>


In [17]:
collections = ['list', 'tuple', 'dict', 'set']

print(collections)
print(type(collections))

['list', 'tuple', 'dict', 'set']
<class 'list'>


In [18]:
features = ['Ivan Ivanovich', 'Medium', 500000, 12, True]
print (features)
print(type(features))

['Ivan Ivanovich', 'Medium', 500000, 12, True]
<class 'list'>


In [19]:
# list length
len(collections)

4

In [20]:
# indexes and slices
print(collections)

print(collections[0])
print(collections[-1])

['list', 'tuple', 'dict', 'set']
list
set


In [21]:
range_list = list(range(5,10))
print(range_list)

[5, 6, 7, 8, 9]


In [22]:
range_list[1:3]

[6, 7]

In [23]:
range_list[3:]

[8, 9]

In [24]:
range_list[:5]

[5, 6, 7, 8, 9]

In [25]:
range_list[::2]

[5, 7, 9]

In [26]:
range_list[::-1]

[9, 8, 7, 6, 5]

In [27]:
# iterations
collections = ['list', 'tuple', 'dict', 'set']

for collection in collections:
    print('Learning {}...'.format(collection))

Learning list...
Learning tuple...
Learning dict...
Learning set...


In [28]:
for idx, collection in enumerate(collections):
    print('#{} {}'.format(idx, collection))

#0 list
#1 tuple
#2 dict
#3 set


In [29]:
# add or remove elements
collections.append('OrderedDict')

print(collections)

['list', 'tuple', 'dict', 'set', 'OrderedDict']


In [30]:
collections.extend(['ponyset', 'unicorndict'])

print(collections)

['list', 'tuple', 'dict', 'set', 'OrderedDict', 'ponyset', 'unicorndict']


In [31]:
collections += [None]

print(collections)

['list', 'tuple', 'dict', 'set', 'OrderedDict', 'ponyset', 'unicorndict', None]


In [32]:
del collections[4]

print(collections)

['list', 'tuple', 'dict', 'set', 'ponyset', 'unicorndict', None]


In [33]:
# min, max. sum
numbers = [4, 17, 19, 9, 2, 6, 10, 13]

print(min(numbers))
print(max(numbers))
print(sum(numbers))

2
19
80


## 2. Collections: tuple

In [34]:
empty_tuple = ()
empty_tuple = tuple()

In [35]:
immutables = (int, str, tuple)

In [36]:
one_element_tuple = (1,)
guess_what = (1)

type(guess_what)

int

## 3. Collections: dictionary


In [37]:
empty_dict = {}
empty_dict = dict()

collections_map = {
    'mutable': ['list', 'set', 'dict'],
    'immutable': ['tuple', 'frozenset']
}

In [38]:
print(collections_map['immutable'])

['tuple', 'frozenset']


In [39]:
# add elements
beatles_map = {
    'Paul': 'Bass',
    'John': 'Guitar',
    'George': 'Guitar',
}

print(beatles_map)

beatles_map['Ringo'] = 'Drums'

print(beatles_map)

{'Paul': 'Bass', 'John': 'Guitar', 'George': 'Guitar'}
{'Paul': 'Bass', 'John': 'Guitar', 'George': 'Guitar', 'Ringo': 'Drums'}


In [40]:
# delete elements 
del beatles_map['John']


print(beatles_map)

{'Paul': 'Bass', 'George': 'Guitar', 'Ringo': 'Drums'}


In [41]:
# itteration 
for key, value in beatles_map.items():
    print('{} — {}'.format(key, value))

Paul — Bass
George — Guitar
Ringo — Drums


## 4. Collections: set

In [42]:
empty_set = set()
number_set = {1, 2, 3, 3, 3, 3, 4, 5}

print(number_set)

{1, 2, 3, 4, 5}


In [43]:
odd_set, odd_set = set(), set()

odd_set = {1, 3, 5, 7}
even_set = {2, 4, 6, 8}

print(odd_set)
print(even_set)

union_set = odd_set | even_set
print(union_set)

intersection_set = odd_set & even_set
print(intersection_set)
for element in union_set:
  print(element)

{1, 3, 5, 7}
{8, 2, 4, 6}
{1, 2, 3, 4, 5, 6, 7, 8}
set()
1
2
3
4
5
6
7
8


## Where you can finde more informations about  Python

* https://www.coursera.org/courses?query=Python
* https://www.codeacademy.com
* http://www.pythontutor.ru
* http://www.learnpythonthehardway.org
* http://snakify.org
* https://www.checkio.org

# Numpy

## Numpy: arrays

### Basics 

In [44]:
import numpy as np

In [45]:
# array
a = np.array([1, 2, 3, 4, 5])
print(a)

[1 2 3 4 5]


In [46]:
# float type array
b = np.array([1.5, 2.6, 3.1, 4.8, 5.0], dtype=float)
print(b)

[1.5 2.6 3.1 4.8 5. ]


In [47]:
# boolean type array
c = np.array([True, False, True], dtype=bool)
print(c)

[ True False  True]


In [48]:
#another way to create numpy array
d = np.arange(start=10, stop=20, step=1) 
print(d)

[10 11 12 13 14 15 16 17 18 19]


In [49]:
# numpy array is 1D 
print(c.ndim)

1


In [50]:
# operations 
print(a)
print(b)
print(a + b) # sum
print(a - b) # diff
print(a*b)   # multiply

[1 2 3 4 5]
[1.5 2.6 3.1 4.8 5. ]
[ 2.5  4.6  6.1  8.8 10. ]
[-0.5 -0.6 -0.1 -0.8  0. ]
[ 1.5  5.2  9.3 19.2 25. ]


###Norms of vector

**p-norm**

Let p ≥ 1 be a real number. The 
p-norm of vector $x = (x_{1}, \dots, x_{n}) \in \mathbb{R}^{n}$ is

$$
\left\Vert x \right\Vert_{p} = \left( \sum_{i=1}^n \left| x_{i} \right|^{p} \right)^{1 / p},~p \geq 1.
$$

For p = 1 we get the taxicab norm, for p = 2 we get the Euclidean norm

In [51]:
from numpy.linalg import norm

**Taxicab norm or Manhattan norm**
$$
 \left\Vert x \right\Vert_{1} = \sum_{i=1}^n \left| x_{i} \right|.
$$

In [52]:
a = np.array([1, 2, -3])
print('L1 norm of vector a: {norm}'.format(norm = norm(a, ord=1)))

L1 norm of vector a: 6.0


**Euclidean norm**
$$
 \left\Vert x \right\Vert_{2} = \sqrt{\sum_{i=1}^n \left( x_{i} \right)^2}.
$$

In [53]:
print('L2 norm of vector a: {norm}'.format(norm = norm(a, ord=2)))

L2 norm of vector a: 3.7416573867739413


###Distance between arrays

For two arrays $x = (x_{1}, \dots, x_{n}) \in \mathbb{R}^{n}$ и $y = (y_{1}, \dots, y_{n}) \in \mathbb{R}^{n}$ $\ell_{1}$ and $\ell_{2}$  distance is calculated as:
$$
 \rho_{1}\left( x, y \right) = \left\Vert x - y \right\Vert_{1} = \sum_{i=1}^n \left| x_{i} - y_{i} \right|
$$

$$
 \rho_{2}\left( x, y \right) = \left\Vert x - y \right\Vert_{2} = 
 \sqrt{\sum_{i=1}^n \left( x_{i} - y_{i} \right)^2}.
$$

In [54]:
b = np.array([1, 246, -565])
print('L1 distance between a and b: {dist}'.format(dist = norm(a - b, ord=1)))

L1 distance between a and b: 806.0


In [55]:
print('L2 distance between a and b: {dist}'.format(dist = norm(a - b, ord=2)))

L2 distance between a and b: 612.6826258349424


###Scalar product

Scallar dot in $\mathbb{R}^{n}$ of two vectors $x = (x_{1}, \dots, x_{n})$ и $y = (y_{1}, \dots, y_{n})$ is:

$$
\langle x, y \rangle = \sum_{i=1}^n x_{i} y_{i}.
$$

In [56]:
print('Scalar dot of a and b: {dot}'.format(dot = np.dot(a, b)))

Scalar dot of a and b: 2188


In [57]:
# another way
print('Scalar dot of a and b: {dot}'.format(dot = a.dot(b)))

Scalar dot of a and b: 2188


## Numpy: matrix

### Basics

In [58]:
import numpy as np

In [59]:
a = np.array([[1, 2, 3], [2, 5, 6], [6, 7, 4]])
print(a)

[[1 2 3]
 [2 5 6]
 [6 7 4]]


In [60]:
# more examples
b = np.eye(5)
print(b)

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [61]:
c = np.ones((7, 5))
print(c)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


In [62]:
v = np.arange(0, 24, 2)
print(v)
d = v.reshape((3, 4))
print(d)

[ 0  2  4  6  8 10 12 14 16 18 20 22]
[[ 0  2  4  6]
 [ 8 10 12 14]
 [16 18 20 22]]


In [63]:
# indexes
print(d)
print(d[2, 1])
print(d[1, :])
print(d[:, 3])

[[ 0  2  4  6]
 [ 8 10 12 14]
 [16 18 20 22]]
18
[ 8 10 12 14]
[ 6 14 22]


### Matrix and vectors multiplication

In [64]:
a = np.array([[1, 0], [0, 1]])
b = np.array([[4, 1], [2, 2]])
r1 = np.dot(a, b)
r2 = a.dot(b)

In [65]:
print('Matrix a:\n', a)
print('Matrix b:\n', b)
print('Multiplication, 1 method:\n', r1)
print('Multiplication, 2 method:\n', r2)

Matrix a:
 [[1 0]
 [0 1]]
Matrix b:
 [[4 1]
 [2 2]]
Multiplication, 1 method:
 [[4 1]
 [2 2]]
Multiplication, 2 method:
 [[4 1]
 [2 2]]


### Transposition

In [67]:
a = np.array([[1, 2], [3, 4]])
b = np.transpose(a)
c = a.T

In [68]:
print('Matrix a:\n', a)
print('Transposed matrix:\n', b)
print('Transposed matrix:\n', c)

Matrix a:
 [[1 2]
 [3 4]]
Transposed matrix:
 [[1 3]
 [2 4]]
Transposed matrix:
 [[1 3]
 [2 4]]


### Determinant

In [69]:
a = np.array([[1, 2, 1], [1, 1, 4], [2, 3, 6]], dtype=np.float32)
det = np.linalg.det(a)

In [70]:
print('Matrix a:\n', a)
print('Determinant of a:\n', det)

Matrix a:
 [[1. 2. 1.]
 [1. 1. 4.]
 [2. 3. 6.]]
Determinant of a:
 -1.0


### Rank

In [71]:
a = np.array([[1, 2, 3], [1, 1, 1], [2, 2, 2]])
r = np.linalg.matrix_rank(a)

In [72]:
print('Matrix a:\n', a)
print('Rank of a:\n', r)

Matrix a:
 [[1 2 3]
 [1 1 1]
 [2 2 2]]
Rank of a:
 2


In [73]:
# check if systems of vectors is linearly independent
a = np.array([1, 2, 3])
b = np.array([1, 1, 1])
c = np.array([2, 3, 5])
m = np.array([a, b, c])

print(np.linalg.matrix_rank(m) == m.shape[0])

True


### System of linear equations

In [74]:
a = np.array([[3, 1], [1, 2]])
b = np.array([9, 8])
x = np.linalg.solve(a, b)


In [75]:
print('Matrix a:\n', a)
print('Matrix b:\n', b)
print('Solution:\n', x)

Matrix a:
 [[3 1]
 [1 2]]
Matrix b:
 [9 8]
Solution:
 [2. 3.]


### Invertible matrix

In [76]:
a = np.array([[1, 2, 1], [1, 1, 4], [2, 3, 6]], dtype=np.float32)
b = np.linalg.inv(a)

In [77]:
print('Matrix a:\n', a)
print('Invertible matrix b:\n', b)
print('Identity matrix:\n', a.dot(b))

Matrix a:
 [[1. 2. 1.]
 [1. 1. 4.]
 [2. 3. 6.]]
Invertible matrix b:
 [[ 6.  9. -7.]
 [-2. -4.  3.]
 [-1. -1.  1.]]
Identity matrix:
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


# Pandas

## Basics

In [78]:
import pandas as pd

In [79]:
# read csv file

df = pd.read_csv('RegularSeasonCompactResults.csv')

In [80]:
df

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0
3,1985,25,1165,70,1432,54,H,0
4,1985,25,1192,86,1447,74,H,0
...,...,...,...,...,...,...,...,...
145284,2016,132,1114,70,1419,50,N,0
145285,2016,132,1163,72,1272,58,N,0
145286,2016,132,1246,82,1401,77,N,1
145287,2016,132,1277,66,1345,62,N,0


In [81]:
# see the first 5 rows 
df.head(10)

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0
3,1985,25,1165,70,1432,54,H,0
4,1985,25,1192,86,1447,74,H,0
5,1985,25,1218,79,1337,78,H,0
6,1985,25,1228,64,1226,44,N,0
7,1985,25,1242,58,1268,56,N,0
8,1985,25,1260,98,1133,80,H,0
9,1985,25,1305,97,1424,89,H,0


In [82]:
# see the last 5 rows 
df.tail(5)

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
145284,2016,132,1114,70,1419,50,N,0
145285,2016,132,1163,72,1272,58,N,0
145286,2016,132,1246,82,1401,77,N,1
145287,2016,132,1277,66,1345,62,N,0
145288,2016,132,1386,87,1433,74,N,0


In [83]:
# see the dimensions of the dataframe 
df.shape

(145289, 8)

In [84]:
# extract all the column names as a list
df.columns.tolist()

['Season', 'Daynum', 'Wteam', 'Wscore', 'Lteam', 'Lscore', 'Wloc', 'Numot']

In [85]:
# see statistics like mean, min, etc about each column of the dataset
df.describe()

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Numot
count,145289.0,145289.0,145289.0,145289.0,145289.0,145289.0,145289.0
mean,2001.574834,75.223816,1286.720646,76.600321,1282.864064,64.497009,0.044387
std,9.233342,33.287418,104.570275,12.173033,104.829234,11.380625,0.247819
min,1985.0,0.0,1101.0,34.0,1101.0,20.0,0.0
25%,1994.0,47.0,1198.0,68.0,1191.0,57.0,0.0
50%,2002.0,78.0,1284.0,76.0,1280.0,64.0,0.0
75%,2010.0,103.0,1379.0,84.0,1375.0,72.0,0.0
max,2016.0,132.0,1464.0,186.0,1464.0,150.0,6.0


In [86]:
# get the maximum values of all columns
df.max()

Season    2016
Daynum     132
Wteam     1464
Wscore     186
Lteam     1464
Lscore     150
Wloc         N
Numot        6
dtype: object

In [87]:
# get the max value for a particular column
df['Wscore'].max() # or mean(), median(), min(), argmax()

186

In [88]:
# get frequency of each item in the column
print(len(df['Wscore'].unique()))
print(len(df['Wteam'].unique()))
print(len(df['Season'].unique()))

130
364
32


## Sorting Values

In [89]:
# sorting the dataframe in increasing order for the scores of the losing team
df.sort_values('Lscore', ascending=False).head()

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
17867,1989,92,1258,181,1109,150,H,0
16853,1989,68,1258,162,1109,144,A,0
22074,1990,96,1261,148,1258,141,H,0
24970,1991,68,1258,186,1109,140,H,0
1126,1985,65,1424,142,1429,140,A,0


## Filtering Rows

In [90]:
# find which entries are above 150
df[df['Lscore'] < 10].head(10)

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot


In [91]:
# find out when the winning team scores more than 150 points and 
# when the losing team scores below 100.
df2 = df[(df['Wscore'] > 150) & (df['Lscore'] < 90)]
df2

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
12046,1988,40,1328,152,1147,84,H,0
28687,1992,54,1261,159,1319,86,H,0
52600,1998,33,1395,153,1410,87,H,0


## Grouping

In [92]:
df2.to_excel("df2.xlsx")

In [93]:
# get the mean Wscore of each Wteam
df.groupby('Wteam')['Wscore'].mean().head()

Wteam
1101    78.111111
1102    69.893204
1103    75.839768
1104    75.825944
1105    74.960894
Name: Wscore, dtype: float64

In [94]:
# finds where how many times that specific team won at home, on the road, or at a neutral site
df.groupby('Wteam')['Wloc'].value_counts().head(9)

Wteam  Wloc
1101   H        12
       A         3
       N         3
1102   H       204
       A        73
       N        32
1103   H       324
       A       153
       N        41
Name: Wloc, dtype: int64

## Extracting rows and columns

In [95]:
df2 = df[['Wscore', 'Lscore']].head()

In [96]:
df2

Unnamed: 0,Wscore,Lscore
0,81,64
1,77,70
2,63,56
3,70,54
4,86,74


In [97]:
df.iloc[1:10, 1]

1    25
2    25
3    25
4    25
5    25
6    25
7    25
8    25
9    25
Name: Daynum, dtype: int64

In [98]:
df[0:3]

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0


In [99]:
df.iloc[100:120,1:10]


Unnamed: 0,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
100,29,1409,86,1331,82,A,0
101,29,1444,90,1434,57,A,0
102,29,1451,89,1365,61,H,0
103,30,1139,78,1442,67,A,0
104,30,1154,78,1149,61,A,0
105,30,1181,92,1456,60,H,0
106,30,1228,81,1152,63,H,0
107,30,1229,93,1227,62,H,0
108,30,1234,89,1287,46,H,0
109,30,1259,81,1406,73,A,0


Pandas has been around for a while and there are a lot of other good resources if you're still interested on getting the most out of this library.

http://pandas.pydata.org/pandas-docs/stable/10min.html
https://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python
http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures/
https://www.dataquest.io/blog/pandas-python-tutorial/
https://drive.google.com/file/d/0ByIrJAE4KMTtTUtiVExiUGVkRkE/view
https://www.youtube.com/playlist?list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y