In [2]:
import io
import requests
import numpy as np
import pandas as pd
from PIL import Image
from scipy import stats

In [40]:
# get an ndarray from 0 to 29
data = np.arange(30)

# test its type
print(type(data))

# change it to be 5x6
data = data.reshape(5,6)
print(data)

# transpose it
data = data.transpose()
print(data)

# get the column sums
print(data.sum(axis=0))

# row sums
print(data.sum(axis=1))

# square every element
print(data**2)

# cycle through rows
for row in data:
    print(row)
    
# cycle through elements
for row in data:
    for cell in row:
        print(cell)
        
# add rows of ones to data
data = np.vstack((data, np.ones(10).reshape(2, 5)))
print(data)

# add columsn of ones to data
data = np.hstack((data, np.ones(24).reshape(8, 3)))
print(data)

# get the cumulative sum of elements in a column
print(data.cumsum(axis=0))

# get the cumulative sum of elements in a row
print(data.cumsum(axis=1))

# other operations include: mean, cov, var, std
print(data.std(axis=0))

# sort the rows
print(np.sort(data))

# sort all values
print(np.sort(data, axis=None))

# solve a system of equations
mat = np.array([[2.0, 1.0, 1.0], [1.0, -1.0, 1.0], [-2.0, -1.0, 3.0]]).transpose()
y = np.array([[10], [20], [30]])
print(np.linalg.solve(mat, y))

<class 'numpy.ndarray'>
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]
 [24 25 26 27 28 29]]
[[ 0  6 12 18 24]
 [ 1  7 13 19 25]
 [ 2  8 14 20 26]
 [ 3  9 15 21 27]
 [ 4 10 16 22 28]
 [ 5 11 17 23 29]]
[ 15  51  87 123 159]
[60 65 70 75 80 85]
[[  0  36 144 324 576]
 [  1  49 169 361 625]
 [  4  64 196 400 676]
 [  9  81 225 441 729]
 [ 16 100 256 484 784]
 [ 25 121 289 529 841]]
[ 0  6 12 18 24]
[ 1  7 13 19 25]
[ 2  8 14 20 26]
[ 3  9 15 21 27]
[ 4 10 16 22 28]
[ 5 11 17 23 29]
0
6
12
18
24
1
7
13
19
25
2
8
14
20
26
3
9
15
21
27
4
10
16
22
28
5
11
17
23
29
[[ 0.  6. 12. 18. 24.]
 [ 1.  7. 13. 19. 25.]
 [ 2.  8. 14. 20. 26.]
 [ 3.  9. 15. 21. 27.]
 [ 4. 10. 16. 22. 28.]
 [ 5. 11. 17. 23. 29.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]]
[[ 0.  6. 12. 18. 24.  1.  1.  1.]
 [ 1.  7. 13. 19. 25.  1.  1.  1.]
 [ 2.  8. 14. 20. 26.  1.  1.  1.]
 [ 3.  9. 15. 21. 27.  1.  1.  1.]
 [ 4. 10. 16. 22. 28.  1.  1.  1.]
 [ 5. 11. 17. 23. 29.  1.  1.  1.]
 [ 

In [33]:
# download an image and convert to PIL Image object
response = requests.get("https://upload.wikimedia.org/wikipedia/commons/3/30/Googlelogo.png")
image_file = io.BytesIO(response.content)
img = Image.open(image_file)

# get the upper left pixel
rgb = img.convert("RGB")
print(rgb.getpixel((0,0)))

# show the image
img.show()

# load some data up
data = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-28/winemag-data-130k-v2.csv")

# export the data to hdf5 format
# import tables
# data.to_hdf("output.h5", key="our_data", mode='w')

# see the first 10 lines
data.head(10)

# see the last 10 lines
data.tail(10)

# the items method lets you iterate over each column in the dataframe
# in this example, content is a pandas Series object, and label is the column title
# get a full list of dataframe methods: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
for label, content in data.items():
    print(f'{label}: {content[0]}...')
    
# lets get a subset of the wine data where the wines are from Argentina
argentina_wines = data.loc[data['country'] == "Argentina"]

# what is the most popular variety?
# note the type of our data set, and the type when we are getting a
# single column of data
print(type(argentina_wines))
print(type(argentina_wines['variety']))

# you can find methods of a Series here: https://pandas.pydata.org/pandas-docs/stable/reference/series.html
print(argentina_wines['variety'].describe())

# looks like Malbec is the most popular variety, but that Argentina has 67 varieties!
print(argentina_wines['variety'].value_counts())

# there are some pretty entertaining descriptions in this dataset
# how many wines are described as "earthy"? What is the most common
# variety to be described as "earthy"?
earthy_wines = data.loc[data['description'].str.contains('earthy')]
print(earthy_wines['variety'].describe())

# let's get the average price by province
print(data.groupby(['province']).mean()['price'].sort_values(ascending=False))

# let's add our own wine to the dataset
data.head()
pinot_indiana = {'country': 'US', 
                 'description': 'Sweet, containing hints of corn and soy. The aroma is of freshly cut grass.',
                 'designation': 'Farmer\'s reserve',
                 'points': 47,
                 'price': 4.5,
                 'province': 'Midwest',
                 'region_1': 'Indiana',
                 'region_2': 'Muncie',
                 'taster_name': 'Purdue Pete',
                 'taster_twitter_handle': '@PurduePete',
                 'title': 'Maize for Dayz 2016',
                 'variety': 'Hoosier Yellow',
                 'winery': 'Fine Wines of Indiana'}
our_wine = pd.DataFrame(data=pinot_indiana, index=[0])

data = data.append(our_wine, ignore_index=True)
data.tail()

(0, 0, 0)
Unnamed: 0: 0...
country: Italy...
description: Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering unripened apple, citrus and dried sage alongside brisk acidity....
designation: Vulkà Bianco...
points: 87...
price: nan...
province: Sicily & Sardinia...
region_1: Etna...
region_2: nan...
taster_name: Kerin O’Keefe...
taster_twitter_handle: @kerinokeefe...
title: Nicosia 2013 Vulkà Bianco  (Etna)...
variety: White Blend...
winery: Nicosia...
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
count       3800
unique        67
top       Malbec
freq        1510
Name: variety, dtype: object
Malbec                               1510
Cabernet Sauvignon                    540
Chardonnay                            295
Torrontés                             245
Red Blend                             234
                                     ... 
Moscatel                                1
Cabernet-Malbec       

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return concat(


Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
129967,129967.0,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,129968.0,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,129969.0,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss
129970,129970.0,France,"Big, rich and off-dry, this is powered by inte...",Lieu-dit Harth Cuvée Caroline,90,21.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Schoffit 2012 Lieu-dit Harth Cuvée Car...,Gewürztraminer,Domaine Schoffit
129971,,US,"Sweet, containing hints of corn and soy. The a...",Farmer's reserve,47,4.5,Midwest,Indiana,Muncie,Purdue Pete,@PurduePete,Maize for Dayz 2016,Hoosier Yellow,Fine Wines of Indiana
