In [None]:
import csv
import json
import pandas
import plotly
import requests

## Exercise: Python, Data and You

### Unstructured

#### Image

In [None]:
# the contents of an image file is not human readable (unless you really enjoy reading hexadecimal)
with open("how_i_created_this_presentation.png", "rb") as img_file:
    img_contents = img_file.read()
    
print(img_contents)

In [None]:
# however they are encoded in a way a computer can
# Here is the PNG spec in case you are curious: http://www.libpng.org/pub/png/spec/1.2/PNG-Structure.html
with Image.open("how_i_created_this_presentation.png") as img:
    img.show()

#### Text Files

In [None]:
# Unstructured text readable, but limited in use from a programming point of view
with open("wisdom.txt") as txt_file:
    txt_data = txt_file.read()

print(type(txt_data))
print(txt_data)

### Structured

#### JSON

In [None]:
import json

# at first glance the benefits of structured data may not be clear...
with open("pikachu.json") as jsonfile:
    poke_data = jsonfile.read()
    
print(poke_data)

In [None]:
# but deserializing the data allows us to do a lot of useful things in python
pikachu = json.loads(poke_data)
available_keys = pikachu.keys()
print(available_keys)

In [None]:
pikachus_moves = [move['move']['name'] for move in pikachu['moves']]
print(sorted(pikachus_moves))

#### CSV

In [None]:
# same goes for csv files
with open('swifty.csv', newline='') as csvfile:
    teardrops_on_my_data = csvfile.read()
print(type(teardrops_on_my_data))
print(teardrops_on_my_data)

In [None]:
import csv
bangers = []
with open('swifty.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        bangers.append(row)

print(type(bangers))
print("Song - Danceability")
bangers = sorted(bangers, key=lambda song: song['danceability'], reverse=True)
for bop in bangers:
    print(f"{bop['name']} - {bop['danceability']}")

In [None]:
import pandas
df = pandas.read_csv('swifty.csv')
df.sort_values('danceability', ascending=False)

In [None]:
import plotly.express as px
fig = px.histogram(df, x="danceability")
fig.show()

In [None]:
fig2 = px.histogram(df, x="acousticness")
fig2.show()

## References
### Books
- [Python for Data Analysis: Data Wrangling with Pandas, NumPy, and IPython](https://www.oreilly.com/library/view/python-for-data/9781491957653/)

### Documentation
- [Reading and Writing Files](https://docs.python.org/3/tutorial/inputoutput.html#reading-and-writing-files)
- [Sorting](https://docs.python.org/3/howto/sorting.html)
- [csv](https://docs.python.org/3/library/csv.html)
- [matplotlib](https://matplotlib.org/)
- [nltk](https://www.nltk.org/install.html)
- [numpy](https://numpy.org/)
- [Pillow](https://pillow.readthedocs.io/en/stable/index.html)
- [plotly](https://plotly.com/python/)
- [requests](https://docs.python-requests.org/en/latest/)
- [worldcloud](https://amueller.github.io/word_cloud/)