In [1]:
import numpy as np
mnist_filename = 'mnist_kaggle_some_rows.csv'
data = np.loadtxt(mnist_filename, delimiter=',')
print(data)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [2. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [5. 0. 0. ... 0. 0. 0.]]


### battledeaths dataset
* modified from the Peace Research Institute Oslo (PRIO)
* contains age-adjusted mortality rates due to war in various countires over several years(for the period 1946–2008)
* https://www.prio.org/data/1

In [20]:
import pandas as pd
file = 'PRIO Battle Deaths Dataset 3.1.xls'
xls = pd.ExcelFile(file)
print(xls.sheet_names)
print(xls)
df1 = xls.parse(0, index_col=0)
print(df1.head())

['bdonly']
<pandas.io.excel._base.ExcelFile object at 0x11b241190>
    year  bdeadlow  bdeadhig  bdeadbes  annualdata  source  bdversion  \
id                                                                      
1   1946      1000      9999      1000           2       1        3.1   
1   1952       450      3000      -999           2       1        3.1   
1   1967        25       999        82           2       1        3.1   
2   1946        25       999      -999           0       0        3.1   
2   1947        25       999      -999           0       0        3.1   

    location    sidea sidea2nd  ... epend  ependdate  ependprec gwnoa  \
id                              ...                                     
1    Bolivia  Bolivia      NaN  ...     1 1946-07-21      -99.0   145   
1    Bolivia  Bolivia      NaN  ...     1 1952-04-12      -99.0   145   
1    Bolivia  Bolivia      NaN  ...     1 1967-10-16      -99.0   145   
2   Cambodia   France      NaN  ...     0        NaT    

### SAS: Statistical Analysis Systems
* for business analytics, biostatistics
* extension: <b>.sas7bdat</b>
* toy data: http://www.principlesofeconometrics.com/poe5/poe5data.html (from Principles of Econometrics, 5th edition)

In [21]:
from sas7bdat import SAS7BDAT
import pandas as pd

df = pd.read_sas('sales.sas7dbat')
print(df.head())

with SAS7BDAT('sales.sas7bdat') as file:
    df_sas = file.to_data_frame()
    
print(df_sas.head())

#plot histogram
pd.DataFrame.hist(df_sas[['P']])
plt.ylabel('count')
plt.show()

ModuleNotFoundError: No module named 'sas7bdat'

In [4]:
#Fibonacci
def fib(n:int)-> int:
    if n<2:
        return n
    return fib(n-2)+fib(n-1)

def test():
    
    if __name__ == "__main__":
        print(fib(10))    
        print(fib(20))    

test()

55
6765


In [7]:
#using memorization (dict)
from typing import Dict
memo: Dict[int, int] = {0:0, 1:1}
def fib(n:int)->int:
    if n not in memo:
        memo[n] = fib(n-2)+fib(n-1)
    return memo[n]
test()
print(memo)

55
6765
{0: 0, 1: 1, 2: 1, 3: 2, 4: 3, 5: 5, 6: 8, 7: 13, 8: 21, 9: 34, 10: 55, 11: 89, 12: 144, 13: 233, 14: 377, 15: 610, 16: 987, 17: 1597, 18: 2584, 19: 4181, 20: 6765}


In [9]:
#using automatic memorization
from functools import lru_cache

@lru_cache(maxsize=None)
def fib(n:int) -> int:
    if n<2:
        return n
    return fib(n-2)+fib(n-1)

test()

55
6765


In [10]:
#optimal solution(iterative, time complextiy O(n))
def fib(n:int) -> int:
    if n==0:
        return 0
    last: int=0
    next: int=1
    for _ in range(1,n):
        last, next = next,last+next
    return next

test()

55
6765


In [13]:
#optimal solution using Generator
from typing import Generator
def fib(n:int) -> Generator[int, None, None]:
    yield 0
    if n==0:
        return
    last: int=0
    next: int=1
    for _ in range(1,n):
        last, next = next,last+next
        yield next
        
if __name__ == "__main__":
    for i, v in enumerate(list(fib(30))):
        print("[{}]: {}".format(i, v))
        
        
        

[0]: 0
[1]: 1
[2]: 2
[3]: 3
[4]: 5
[5]: 8
[6]: 13
[7]: 21
[8]: 34
[9]: 55
[10]: 89
[11]: 144
[12]: 233
[13]: 377
[14]: 610
[15]: 987
[16]: 1597
[17]: 2584
[18]: 4181
[19]: 6765
[20]: 10946
[21]: 17711
[22]: 28657
[23]: 46368
[24]: 75025
[25]: 121393
[26]: 196418
[27]: 317811
[28]: 514229
[29]: 832040
