In [11]:
import numpy as np
from urllib.request import urlopen # Importing our FTE dataset
import timeit

In [24]:
# Since we need a dataset to start with, I'll go ahead and grab it from FiveThirtyEight's data github
# For copying down: go to   bit.ly/2vPECXz   then copy the URL that shows up
text = urlopen('http://raw.githubusercontent.com/fivethirtyeight/data/master/us-weather-history/KMDW.csv').read().decode('utf-8')
with open('./KMDW.txt','w') as f:
    f.write(text[:-1]) # The last char is a newline

In [None]:
# Advantages of numpy:
#     Fast AF operations (Backend is C something)
#     Treat 2D data in a more natural way (row,column)
#     Extremely easy dataset imports (from txt, csv, etc.)
#     Is the backend for pandas
#     Allows for element-wise addition/term setting

In [35]:
with open('./KMDW.txt','r') as f:
    data = f.readlines()
    for i,x in enumerate(data[:10]):
        data[i] = x.split(',')

[['date', 'actual_mean_temp', 'actual_min_temp', 'actual_max_temp', 'average_min_temp', 'average_max_temp', 'record_min_temp', 'record_max_temp', 'record_min_temp_year', 'record_max_temp_year', 'actual_precipitation', 'average_precipitation', 'record_precipitation\n'], ['2014-7-1', '76', '68', '84', '67', '84', '49', '103', '1982', '1956', '0.04', '0.11', '1.23\n'], ['2014-7-2', '66', '58', '74', '67', '84', '48', '99', '1930', '1970', '0.12', '0.10', '2.50\n'], ['2014-7-3', '66', '57', '75', '67', '84', '50', '102', '1940', '1949', '0.05', '0.11', '1.38\n'], ['2014-7-4', '70', '60', '79', '67', '84', '49', '101', '1940', '2012', '0.00', '0.11', '0.94\n'], ['2014-7-5', '71', '63', '79', '67', '84', '46', '103', '1972', '2012', '0.01', '0.13', '1.18\n'], ['2014-7-6', '78', '66', '89', '67', '84', '49', '105', '1983', '2012', '0.00', '0.12', '3.93\n'], ['2014-7-7', '81', '72', '89', '67', '85', '48', '102', '1983', '1936', '0.48', '0.13', '1.30\n'], ['2014-7-8', '77', '70', '84', '68', '

In [64]:
weather = np.loadtxt('./KMDW.txt', dtype=str, 
                     delimiter=',', skiprows=1)
# (KMDW is a weather station in Chicago)
# Need to do this so that date imports properly
# In numpy, you have a 2D thing. 
# That means you have to send in multiple slicing indices
# We're going to do this to remove the date and reformat the number columns
# Column meanings:
#    date,actual_mean_temp,actual_min_temp,actual_max_temp,average_min_temp,average_max_temp,
#    record_min_temp,record_max_temp,record_min_temp_year,record_max_temp_year,actual_precipitation,average_precipitation,
#    record_precipitation
weather = weather[:,1:] # Rows, columns
weather = weather.astype(float)

print(weather[:,1].std())
# Stuff: maxima, standard deviation over range, indexing, etc.

19.712865852726836


In [None]:
# List comprehensions
dataset = list(range(1,100))
soln = 1
for i in dataset:
    soln *= i
print(soln)

digitsum = 0
for i in str(soln):
    digitsum += int(i)
print(digitsum)

# List comprehensions start here

y = [1,2,3]
print([x for x in y])
print(sum([int(x) for x in str(soln)]))

setup_factorial = '''dataset = list(range(1,100))
soln = 1
for i in dataset:
    soln *= i'''

traditional_method = '''digitsum = 0
for i in str(soln):
    digitsum += int(i)'''

list_comprehensions = '''sum([int(x) for x in str(soln)])'''

iterations = 10000
print(timeit.timeit(setup=setup_factorial ,stmt=traditional_method, number=iterations))
print(timeit.timeit(setup=setup_factorial ,stmt=list_comprehensions, number=iterations))

gen_data = [1,2,3,None,5,6,7,8]
gen_data = [x for x in gen_data if type(x) == int]
print(gen_data)
print(sum(gen_data))

In [62]:
lists_inside_lists = [[x for x in range(y)] for y in range(10)]
for i in n:
    for j in m:
        print()
print(lists_inside_lists)

933262154439441526816992388562667004907159682643816214685929638952175999932299156089414639761565182862536979208272237582511852109168640000000000000000000000
648
[1, 2, 3]
648
0.38686319113003265
0.3832515425156089
[1, 2, 3, 5, 6, 7, 8]
32
[[], [0], [0, 1], [0, 1, 2], [0, 1, 2, 3], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7, 8]]


In [38]:
np_setup = '''import numpy as np
weather = np.loadtxt('./KMDW.txt',dtype=str,delimiter=',',skiprows=1)[:,1].astype(float)'''

list_setup = '''import numpy as np
weather = np.loadtxt('./KMDW.txt',dtype=str,delimiter=',',skiprows=1)[:,1].astype(float).tolist()'''

iterations = 100
print(timeit.timeit(setup=np_setup,stmt='weather.max()', number=iterations))
print(timeit.timeit(setup=list_setup,stmt='max(weather)', number=iterations))

0.0004495795365073718
0.001173726539946074


In [43]:
print(weather.mean())

357.55624885844753


In [63]:
'this is a sentence'.split(' ')

['this', 'is', 'a', 'sentence']