In [None]:
import csv
import json
import pandas
import plotly
import requests

## Exercise: Python, Data and You

### Unstructured

#### Image

In [None]:
# the contents of an image file is not human readable (unless you really enjoy reading hexadecimal)
with open("how_i_created_this_presentation.png", "rb") as img_file:
    img_contents = img_file.read()

print(type(img_contents))
print(img_contents)

In [None]:
# however they are encoded in a way a computer can
# Here is the PNG spec in case you are curious: http://www.libpng.org/pub/png/spec/1.2/PNG-Structure.html
from PIL import Image
with Image.open("how_i_created_this_presentation.png") as img:
    img.show()

#### Text Files

In [None]:
# Unstructured text readable, but limited in use from a programming point of view
with open("wisdom.txt") as txt_file:
    txt_data = txt_file.read()

print(type(txt_data))
print(txt_data)

### Structured

#### JSON

In [None]:
import json

# at first glance the benefits of structured data may not be clear...
with open("pikachu.json") as jsonfile:
    poke_data = jsonfile.read()

print(type(poke_data))
print(poke_data)

In [None]:
# but deserializing the data allows us to do a lot of useful things in python
pikachu = json.loads(poke_data)
pikachus_moves = []
for move in pikachu['moves']:
    pikachus_moves.append(move['move']['name'])
print(pikachus_moves)

#### CSV

In [None]:
# same goes for csv files

with open('swifty.csv', newline='') as csvfile:
    teardrops_on_my_data = csvfile.read()
print(type(teardrops_on_my_data))
print(teardrops_on_my_data)

In [13]:
import csv
bangers = []
with open('swifty.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        bangers.append(row)




In [14]:
print(type(bangers))
print("Song - Danceability")
bangers = sorted(bangers, key=lambda song: song['danceability'], reverse=True)
for bop in bangers:
    print(f"{bop['name']} - {bop['danceability']}")

<class 'list'>
Song - Danceability
I Think He Knows - 0.897
Treacherous - Original Demo Recording - 0.828
Cornelia Street - 0.824
Clean - 0.815
Paper Rings - 0.811
Gorgeous - 0.8
Welcome To New York - 0.789
Hey Stephen (Taylor’s Version) - 0.788
I Wish You Would - Voice Memo - 0.781
the 1 - 0.78
The Man - 0.777
You Need To Calm Down - 0.771
Look What You Made Me Do - 0.766
How You Get The Girl - 0.765
Blank Space - 0.76
Afterglow - 0.756
Delicate - 0.75
False God - 0.739
It’s Nice To Have A Friend - 0.737
Girl At Home - 0.733
Stay Stay Stay - 0.729
Dress - 0.719
Never Grow Up - 0.715
Death By A Thousand Cuts - 0.712
Speak Now - 0.708
The Lucky One - 0.706
Treacherous - 0.704
Mine - POP Mix - 0.696
I Did Something Bad - 0.696
London Boy - 0.695
closure - 0.689
the last great american dynasty - 0.688
Red - Original Demo Recording - 0.678
hoax - 0.676
Blank Space - Voice Memo - 0.675
King Of My Heart - 0.675
Our Song - 0.668
I Forgot That You Existed - 0.664
Miss Americana & The Heartbrea

In [15]:
import pandas
df = pandas.read_csv('swifty.csv')
df.sort_values('danceability', ascending=False)
df

Unnamed: 0.1,Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo
0,0,Tim McGraw,Taylor Swift,Taylor Swift,2006-10-24,232106,49,0.580,0.575,0.491,0.000000,0.1210,-6.462,0.0251,0.425,76.009
1,1,Picture To Burn,Taylor Swift,Taylor Swift,2006-10-24,173066,54,0.658,0.173,0.877,0.000000,0.0962,-2.098,0.0323,0.821,105.586
2,2,Teardrops On My Guitar - Radio Single Remix,Taylor Swift,Taylor Swift,2006-10-24,203040,59,0.621,0.288,0.417,0.000000,0.1190,-6.941,0.0231,0.289,99.953
3,3,A Place in this World,Taylor Swift,Taylor Swift,2006-10-24,199200,49,0.576,0.051,0.777,0.000000,0.3200,-2.881,0.0324,0.428,115.028
4,4,Cold As You,Taylor Swift,Taylor Swift,2006-10-24,239013,50,0.418,0.217,0.482,0.000000,0.1230,-5.769,0.0266,0.261,175.558
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,166,Mr. Perfectly Fine (Taylor’s Version) (From Th...,Fearless (Taylor's Version),Taylor Swift,2021-04-09,277591,74,0.660,0.162,0.817,0.000000,0.0667,-6.269,0.0521,0.714,135.942
167,167,We Were Happy (Taylor’s Version) (From The Vault),Fearless (Taylor's Version),Taylor Swift,2021-04-09,244236,65,0.609,0.849,0.373,0.000000,0.0779,-8.819,0.0263,0.130,106.007
168,168,That’s When (feat. Keith Urban) (Taylor’s Vers...,Fearless (Taylor's Version),Taylor Swift,2021-04-09,189495,67,0.588,0.225,0.608,0.000000,0.0920,-7.062,0.0365,0.508,90.201
169,169,Don’t You (Taylor’s Version) (From The Vault),Fearless (Taylor's Version),Taylor Swift,2021-04-09,208608,66,0.563,0.514,0.473,0.000012,0.1090,-11.548,0.0503,0.405,101.934


In [16]:
import plotly.express as px
fig = px.histogram(df, x="danceability")
fig.show()

In [17]:
fig2 = px.histogram(df, x="acousticness")
fig2.show()

## References
### Books
- [Python for Data Analysis: Data Wrangling with Pandas, NumPy, and IPython](https://www.oreilly.com/library/view/python-for-data/9781491957653/)

### Documentation
- [Reading and Writing Files](https://docs.python.org/3/tutorial/inputoutput.html#reading-and-writing-files)
- [Sorting](https://docs.python.org/3/howto/sorting.html)
- [csv](https://docs.python.org/3/library/csv.html)
- [matplotlib](https://matplotlib.org/)
- [nltk](https://www.nltk.org/install.html)
- [numpy](https://numpy.org/)
- [Pillow](https://pillow.readthedocs.io/en/stable/index.html)
- [plotly](https://plotly.com/python/)
- [requests](https://docs.python-requests.org/en/latest/)
- [worldcloud](https://amueller.github.io/word_cloud/)