In [None]:
import requests

response = requests.get('https://raw.githubusercontent.com/CalebCurry/python/master/netflix_titles.csv')

#print(response.text)
#data = response.text.split(',')

data = response.text.split('\r\n')

#print(data)

for row in data:
    for element in row.split(','):
        print(element)

## CSV Files

Using `csv` module is easier than doing it manually above.
Use `pwd` in terminal to find the path towards `netflix_titles.csv` and use that path.

In [None]:
import csv

with open('/Users/paulapivat/Desktop/RCode/python/python_bootcamp/netflix_titles.csv', newline='') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row[2])


## Pandas

Another option.

In [None]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/CalebCurry/python/master/netflix_titles.csv')

df.head()

## csv with requests.get()

src - https://stackoverflow.com/questions/18897029/read-csv-file-from-url-into-python-3-x-csv-error-iterator-should-return-str

## csv with requests.get()

src - https://stackoverflow.com/questions/18897029/read-csv-file-from-url-into-python-3-x-csv-error-iterator-should-return-str

In [None]:
import csv
import codecs

response = requests.get('https://raw.githubusercontent.com/CalebCurry/python/master/netflix_titles.csv')
r = response.iter_lines()

reader = csv.reader(codecs.iterdecode(r, 'utf-8'))
for row in reader:
    print(row[2])

## Web scraping - Beautiful Soup

This is an option for working with websites when there is no API or the API does not give he needed options.
Another tool is html requests.

In [None]:
# we can try using requests before using beautiful soup
# requests can help us get HTML
# beautiful soup is used when working with html tags

import requests
from bs4 import BeautifulSoup

response = requests.get('https://www.crummy.com/software/BeautifulSoup/bs4/doc/')
#print(response.text)

soup = BeautifulSoup(response.text, 'html.parser')

# work with html tags

#print(soup.p) <- first paragraph
#print(soup.find_all('p'))  <- find all paragraphs
#print(soup.title.string)
#print(soup.h3)   <- specific tag

# what Beautiful Soup returns is an iterable
for p in soup.find_all('p'):
    print(p)


In [None]:
# Grab any wikipedia article, parse, grab image and display

import requests
from bs4 import BeautifulSoup

response = requests.get('https://en.wikipedia.org/wiki/Cat')
#print(response.text)

soup = BeautifulSoup(response.text, 'html.parser')

#soup.find_all('img')  <-- print all img tags

# iterate through img tags to grab src
#for img in soup.find_all('img'):
#    src = 'https://en.wikipedia.org'
#    full_img = src + img['src']
#    print(full_img)

# need to debug above as img link not working

# what link should look like: https://en.wikipedia.org/wiki/Cat#/media/File:Kittyply_edit1.jpg

for img in soup.find_all('img'):
    full_img = 'https:' + img['src']
    print(full_img)


In [None]:
# Turn web address into images in notebook using IPython.display

from IPython.display import Image

Image('https://upload.wikimedia.org/wikipedia/commons/thumb/b/bb/Kittyply_edit1.jpg/220px-Kittyply_edit1.jpg', width = 100)

In [None]:
import requests
from bs4 import BeautifulSoup
from IPython.display import Image, display

response = requests.get('https://en.wikipedia.org/wiki/Cat')
#print(response.text)

soup = BeautifulSoup(response.text, 'html.parser')

#iterate and print all images
# pass Image(full_img) to display() function from IPython library


for img in soup.find_all('img'):
    full_img = 'https:' + img['src']
    if full_img.endswith('jpg'):
        display(Image(full_img, width = 100))



#for img in soup.find_all('img'):
#    full_img = 'https:' + img['src']
#    try:
#        display(Image(full_img, width = 100))
#    except Exception as e:
#        print(e)


In [None]:
import mimetypes

response = requests.get('https://en.wikipedia.org/wiki/Cat')

soup = BeautifulSoup(response.text, 'html.parser')

for img in soup.find_all('img'):
    full_img = 'https:' + img['src']
    mimetype = mimetypes.guess_type(full_img)
    if mimetype[0] == 'image/jpeg':
        display(Image(full_img, width = 100))
        


In [None]:
# generalize to get all images & wrap this in a function
import requests
import mimetypes
from bs4 import BeautifulSoup
from IPython.display import Image, display

def print_images(url, start=None, stop=None):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    for img in soup.find_all('img')[start:stop]:
        if img['src'].startswith('//'):
            full_img = 'https:' + img['src']
        else:
            full_img = 'https://en.wikipedia.org' + img['src']
        mimetype = mimetypes.guess_type(full_img)
        if mimetype[0] is None:
            continue
        if mimetype[0].startswith('image'):
            display(Image(full_img, width = 100))

print_images('https://en.wikipedia.org/wiki/Cat')


## Working with URLs with urllib

Improving on our web scraping script above by:
- removing hard coded strings
- simplify conditional logic

In [None]:
from urllib.parse import urljoin 

# easily combine two web addresses
url = 'https://en.wikipedia.org/wiki/PlayStation_5'
path = '/static/images/footer/wikimedia-button.png'
urljoin(url, path)

url = 'https://en.wikipedia.org/wiki/PlayStation_5'
path = '//upload.wikimedia.org/wikipedia/commons/thumb/4/47/Sound-icon.svg/20px-Sound-icon.svg.png'
urljoin(url, path)

In [None]:

from urllib.parse import urljoin 
import mimetypes

def print_images(url, start=None, stop=None):
    response = requests.get(url)

    soup = BeautifulSoup(response.text, 'html.parser')

    for img in soup.find_all('img')[start:stop]:
        # replace if-else logic above with urljoin
        full_img = urljoin(url, img['src'])
        mimetype = mimetypes.guess_type(full_img)
        if mimetype[0] is None:
            continue
        print(mimetype[0])
        if mimetype[0] in ['image/jpeg', 'image/png', 'image/gif']:
            display(Image(full_img, width = 100))

#print_images('https://en.wikipedia.org/wiki/Cat')
#print_images(url)


## Dates and times


In [15]:
from datetime import datetime

now = datetime.now()
print(now)

future = datetime(2022, 1, 1)
print(future)

print(type(future - now))
time_diff = future - now
print(f"Days until 2022: {time_diff.days}")
print(f"Seconds until 2022: {time_diff.seconds}")
print(f"Microseconds until 2022: {time_diff.microseconds}")

# two forward slashes for integer dvision (rounds up)
hours = time_diff.seconds // 3600
print(hours)

minutes = time_diff.seconds % 3600 // 60
# to get remainder use modolo
seconds = time_diff.seconds - (hours * 3600) - (minutes * 60)

print(seconds)

print(f'Time until 2022: {time_diff}')
print(time_diff.days, hours, minutes, seconds, time_diff.microseconds)



2021-12-30 11:31:32.733424
2022-01-01 00:00:00
<class 'datetime.timedelta'>
Days until 2022: 1
Seconds until 2022: 44907
Microseconds until 2022: 266576
12
27
Time until 2022: 1 day, 12:28:27.266576
1 12 28 27 266576


In [None]:
# Using days, seconds and microseconds
# Build out hours, minutes?

In [16]:


now = datetime.now()
year = now.year
future = datetime(year + 1, 1, 1)

print(future - now)

1 day, 12:28:19.659370


In [19]:
# timesamps

import time

#  when the epoch started
print(time.gmtime(0))

# timestamps
time.time()

time.struct_time(tm_year=1970, tm_mon=1, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=1, tm_isdst=0)


1640838823.56873

In [24]:
# check python version dynamically
import time
from platform import python_version

version = python_version()
print(version)

if int(version[2]) >= 7:
    print("Nanoseconds since the epoch:", time.time_ns())

3.8.5
Nanoseconds since the epoch: 1640839056467075000


In [25]:

# to simulate how long something takes

before = datetime.now()

time.sleep(4)

after = datetime.now()

print(after - before)

0:00:04.002507


## Copy

In [28]:
# understand difference between deep and shallow copy

data1 = 5
data2 = data1

# initially point to same area in memory
print(id(data1), id(data2))

# now area in memory changes
data1 = 6
print(data1, data2)
print(id(data1), id(data2))

4495219520 4495219520
6 5
4495219552 4495219520


In [29]:

data1 = [5]
data2 = data1

# initially point to same area in memory
print(id(data1), id(data2))

# When working with Lists, now address in memory does NOT change
data1[0] = 6

print(data1, data2)
print(id(data1), id(data2))

140551295574208 140551295574208
[6] [6]
140551295574208 140551295574208


When working with objects (e.g., Lists), we're creating an alias, data2 points to data1; they still have the same address in memory

Primitives are immutable, if you change them, they're actually replaced 

Objects are different, you can change the data in the object, but the address in memory stays the same.



### Shallow Copy

In [33]:
from copy import copy

data1 = [5]
# copy data to put into memory
# data2 = data1[:]
# data2 = data1.copy()
data2 = copy(data1)

# initially point to same area in memory
print(id(data1), id(data2))

# When working with Lists, now address in memory does NOT change
data1[0] = 6

print(data1, data2)
print(id(data1), id(data2))

140551316382400 140551296846976
[6] [5]
140551316382400 140551296846976


In [35]:
# more complicated list

from copy import copy

data1 = ['0', 1, 2, 3, [4], {5}, {6:7}]
# copy data to put into memory
# data2 = data1[:]
# data2 = data1.copy()
data2 = copy(data1)


# This change is only reflected in ONE list (the other one is a copy)
# data1[0] = 6

# to see changes in BOTH list
# change a variable that's *in* a list '[4]'
data1[4][0] = 6

print(data1, data2)
print(id(data1), id(data2))

140551316747648 140551316652288
['0', 1, 2, 3, [6], {5}, {6: 7}] ['0', 1, 2, 3, [6], {5}, {6: 7}]
140551316747648 140551316652288


In [36]:
# with a shallow copy, it takes all the id from first list and copies to second list

def print_list(data):
    for d in data:
        print(id(d))
    print()

data1 = ['0', 1, 2, 3, [4], {5}, {6:7}]
data2 = copy(data1)

print_list(data1)
print_list(data2)

140551255956208
4495219392
4495219424
4495219456
140551295574208
140551297445216
140551316744576

140551255956208
4495219392
4495219424
4495219456
140551295574208
140551297445216
140551316744576



In [38]:
from copy import deepcopy

def print_list(data):
    for d in data:
        print(id(d))
    print()

data1 = ['0', 1, 2, 3, [4], {5}, {6:7}]
data2 = deepcopy(data1)

print_list(data1)
print_list(data2)

data1[4][0] = 'new'
print(data1, data2)

140551255956208
4495219392
4495219424
4495219456
140551316652224
140551297442976
140551316735232

140551255956208
4495219392
4495219424
4495219456
140551316571328
140551297445664
140551297023488

['0', 1, 2, 3, ['new'], {5}, {6: 7}] ['0', 1, 2, 3, [4], {5}, {6: 7}]


In [40]:
# changes value in object, not the object itself
# if change from 'x[0] = 9000' to 'x = 9000', that will NOT persist
def do_something(x):
    x = 9000

grades = [0, 1, 2]

do_something(grades)
print(grades)

[0, 1, 2]


### when working with functions, you can only change the data in objects, not hte object itself

## pprint

In [45]:
from pprint import pprint

grades = {
    1: {
        'math': [90, 90, 30, 20],
        'science': [94, 93, 92, 91], 
        'history': [12, 23, 34, 54], 
        'reading': [54, 23, 23, 54]
    }, 
    2: {
        'math': [9, 9, 3, 2],
        'science': [9, 9, 9, 9], 
        'history': [1, 2, 3, 5], 
        'reading': [5, 2, 2, 5]
    }, 
    3: {
        'math': [45, 44, 14, 88],
        'science': [88, 93, 88, 91], 
        'history': [7, 23, 5, 54], 
        'reading': [54, 8, 23, 6]
    }
}

print(grades)
pprint(grades)

{1: {'math': [90, 90, 30, 20], 'science': [94, 93, 92, 91], 'history': [12, 23, 34, 54], 'reading': [54, 23, 23, 54]}, 2: {'math': [9, 9, 3, 2], 'science': [9, 9, 9, 9], 'history': [1, 2, 3, 5], 'reading': [5, 2, 2, 5]}, 3: {'math': [45, 44, 14, 88], 'science': [88, 93, 88, 91], 'history': [7, 23, 5, 54], 'reading': [54, 8, 23, 6]}}
{1: {'history': [12, 23, 34, 54],
     'math': [90, 90, 30, 20],
     'reading': [54, 23, 23, 54],
     'science': [94, 93, 92, 91]},
 2: {'history': [1, 2, 3, 5],
     'math': [9, 9, 3, 2],
     'reading': [5, 2, 2, 5],
     'science': [9, 9, 9, 9]},
 3: {'history': [7, 23, 5, 54],
     'math': [45, 44, 14, 88],
     'reading': [54, 8, 23, 6],
     'science': [88, 93, 88, 91]}}


In [47]:
import json 

print(json.dumps(grades, indent=4))

{
    "1": {
        "math": [
            90,
            90,
            30,
            20
        ],
        "science": [
            94,
            93,
            92,
            91
        ],
        "history": [
            12,
            23,
            34,
            54
        ],
        "reading": [
            54,
            23,
            23,
            54
        ]
    },
    "2": {
        "math": [
            9,
            9,
            3,
            2
        ],
        "science": [
            9,
            9,
            9,
            9
        ],
        "history": [
            1,
            2,
            3,
            5
        ],
        "reading": [
            5,
            2,
            2,
            5
        ]
    },
    "3": {
        "math": [
            45,
            44,
            14,
            88
        ],
        "science": [
            88,
            93,
            88,
            91
        ],
        "history": [
       

In [49]:
# write a json file
import json

with open('data.json', 'w', encoding='utf-8') as f:
    json.dump(grades, f, ensure_ascii=False, indent=4)

In [52]:
# read from json file
with open('data.json') as f:
    grades = json.load(f)

print(type(grades))
pprint(grades)

<class 'dict'>
{'1': {'history': [12, 23, 34, 54],
       'math': [90, 90, 30, 20],
       'reading': [54, 23, 23, 54],
       'science': [94, 93, 92, 91]},
 '2': {'history': [1, 2, 3, 5],
       'math': [9, 9, 3, 2],
       'reading': [5, 2, 2, 5],
       'science': [9, 9, 9, 9]},
 '3': {'history': [7, 23, 5, 54],
       'math': [45, 44, 14, 88],
       'reading': [54, 8, 23, 6],
       'science': [88, 93, 88, 91]}}


## Arrays and Collections

General note: Use Python Lists when you can, unless you have a specific need for an "Array"

In [56]:
from array import array 

# array is only allowed to have one data type, lists can have many
# i = signed int
data = array('i', [1, 5, 23, 65, 23, 2])
print(data)

# append a float, throws error
# data.append(5.5)

for d in data:
    print(type(d))

array('i', [1, 5, 23, 65, 23, 2])
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>


In [60]:
import numpy as np 

# numpy array is optimized for math 
data = np.array([4, 5, 6, 23, 34, 54, 34, 2])

print(data)
print(np.sum(data))
print(np.sqrt(data))

[ 4  5  6 23 34 54 34  2]
162
[2.         2.23606798 2.44948974 4.79583152 5.83095189 7.34846923
 5.83095189 1.41421356]


In [64]:
data = [3,4,5]
# append add to end
data.append(6)
print(data)

# pop from end
data.pop()
print(data)

# add to beginning
data.insert(0, 6)
print(data)

# pop from beginning
data.pop(0)
print(data)

[3, 4, 5, 6]
[3, 4, 5]
[6, 3, 4, 5]
[3, 4, 5]


## If you need to work at both the beginning and end of a List, use doubly-linked List

module `deque`

In [67]:
from collections import deque 

data = deque([1,2,3])

data.appendleft(-7)
print(data.popleft())

print(data)



-7
deque([1, 2, 3])
