In [None]:
import requests

response = requests.get('https://raw.githubusercontent.com/CalebCurry/python/master/netflix_titles.csv')

#print(response.text)
#data = response.text.split(',')

data = response.text.split('\r\n')

#print(data)

for row in data:
    for element in row.split(','):
        print(element)

## CSV Files

Using `csv` module is easier than doing it manually above.
Use `pwd` in terminal to find the path towards `netflix_titles.csv` and use that path.

In [None]:
import csv

with open('/Users/paulapivat/Desktop/RCode/python/python_bootcamp/netflix_titles.csv', newline='') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row[2])


## Pandas

Another option.

In [None]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/CalebCurry/python/master/netflix_titles.csv')

df.head()

## csv with requests.get()

src - https://stackoverflow.com/questions/18897029/read-csv-file-from-url-into-python-3-x-csv-error-iterator-should-return-str

## csv with requests.get()

src - https://stackoverflow.com/questions/18897029/read-csv-file-from-url-into-python-3-x-csv-error-iterator-should-return-str

In [None]:
import csv
import codecs

response = requests.get('https://raw.githubusercontent.com/CalebCurry/python/master/netflix_titles.csv')
r = response.iter_lines()

reader = csv.reader(codecs.iterdecode(r, 'utf-8'))
for row in reader:
    print(row[2])

## Web scraping - Beautiful Soup

This is an option for working with websites when there is no API or the API does not give he needed options.
Another tool is html requests.

In [None]:
# we can try using requests before using beautiful soup
# requests can help us get HTML
# beautiful soup is used when working with html tags

import requests
from bs4 import BeautifulSoup

response = requests.get('https://www.crummy.com/software/BeautifulSoup/bs4/doc/')
#print(response.text)

soup = BeautifulSoup(response.text, 'html.parser')

# work with html tags

#print(soup.p) <- first paragraph
#print(soup.find_all('p'))  <- find all paragraphs
#print(soup.title.string)
#print(soup.h3)   <- specific tag

# what Beautiful Soup returns is an iterable
for p in soup.find_all('p'):
    print(p)


In [None]:
# Grab any wikipedia article, parse, grab image and display

import requests
from bs4 import BeautifulSoup

response = requests.get('https://en.wikipedia.org/wiki/Cat')
#print(response.text)

soup = BeautifulSoup(response.text, 'html.parser')

#soup.find_all('img')  <-- print all img tags

# iterate through img tags to grab src
#for img in soup.find_all('img'):
#    src = 'https://en.wikipedia.org'
#    full_img = src + img['src']
#    print(full_img)

# need to debug above as img link not working

# what link should look like: https://en.wikipedia.org/wiki/Cat#/media/File:Kittyply_edit1.jpg

for img in soup.find_all('img'):
    full_img = 'https:' + img['src']
    print(full_img)


In [None]:
# Turn web address into images in notebook using IPython.display

from IPython.display import Image

Image('https://upload.wikimedia.org/wikipedia/commons/thumb/b/bb/Kittyply_edit1.jpg/220px-Kittyply_edit1.jpg', width = 100)

In [None]:
import requests
from bs4 import BeautifulSoup
from IPython.display import Image, display

response = requests.get('https://en.wikipedia.org/wiki/Cat')
#print(response.text)

soup = BeautifulSoup(response.text, 'html.parser')

#iterate and print all images
# pass Image(full_img) to display() function from IPython library


for img in soup.find_all('img'):
    full_img = 'https:' + img['src']
    if full_img.endswith('jpg'):
        display(Image(full_img, width = 100))



#for img in soup.find_all('img'):
#    full_img = 'https:' + img['src']
#    try:
#        display(Image(full_img, width = 100))
#    except Exception as e:
#        print(e)


In [None]:
import mimetypes

response = requests.get('https://en.wikipedia.org/wiki/Cat')

soup = BeautifulSoup(response.text, 'html.parser')

for img in soup.find_all('img'):
    full_img = 'https:' + img['src']
    mimetype = mimetypes.guess_type(full_img)
    if mimetype[0] == 'image/jpeg':
        display(Image(full_img, width = 100))
        


In [None]:
# generalize to get all images & wrap this in a function
import requests
import mimetypes
from bs4 import BeautifulSoup
from IPython.display import Image, display

def print_images(url, start=None, stop=None):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    for img in soup.find_all('img')[start:stop]:
        if img['src'].startswith('//'):
            full_img = 'https:' + img['src']
        else:
            full_img = 'https://en.wikipedia.org' + img['src']
        mimetype = mimetypes.guess_type(full_img)
        if mimetype[0] is None:
            continue
        if mimetype[0].startswith('image'):
            display(Image(full_img, width = 100))

print_images('https://en.wikipedia.org/wiki/Cat')


## Working with URLs with urllib

Improving on our web scraping script above by:
- removing hard coded strings
- simplify conditional logic

In [None]:
from urllib.parse import urljoin 

# easily combine two web addresses
url = 'https://en.wikipedia.org/wiki/PlayStation_5'
path = '/static/images/footer/wikimedia-button.png'
urljoin(url, path)

url = 'https://en.wikipedia.org/wiki/PlayStation_5'
path = '//upload.wikimedia.org/wikipedia/commons/thumb/4/47/Sound-icon.svg/20px-Sound-icon.svg.png'
urljoin(url, path)

In [None]:

from urllib.parse import urljoin 
import mimetypes

def print_images(url, start=None, stop=None):
    response = requests.get(url)

    soup = BeautifulSoup(response.text, 'html.parser')

    for img in soup.find_all('img')[start:stop]:
        # replace if-else logic above with urljoin
        full_img = urljoin(url, img['src'])
        mimetype = mimetypes.guess_type(full_img)
        if mimetype[0] is None:
            continue
        print(mimetype[0])
        if mimetype[0] in ['image/jpeg', 'image/png', 'image/gif']:
            display(Image(full_img, width = 100))

#print_images('https://en.wikipedia.org/wiki/Cat')
#print_images(url)


## Dates and times


In [15]:
from datetime import datetime

now = datetime.now()
print(now)

future = datetime(2022, 1, 1)
print(future)

print(type(future - now))
time_diff = future - now
print(f"Days until 2022: {time_diff.days}")
print(f"Seconds until 2022: {time_diff.seconds}")
print(f"Microseconds until 2022: {time_diff.microseconds}")

# two forward slashes for integer dvision (rounds up)
hours = time_diff.seconds // 3600
print(hours)

minutes = time_diff.seconds % 3600 // 60
# to get remainder use modolo
seconds = time_diff.seconds - (hours * 3600) - (minutes * 60)

print(seconds)

print(f'Time until 2022: {time_diff}')
print(time_diff.days, hours, minutes, seconds, time_diff.microseconds)



2021-12-30 11:31:32.733424
2022-01-01 00:00:00
<class 'datetime.timedelta'>
Days until 2022: 1
Seconds until 2022: 44907
Microseconds until 2022: 266576
12
27
Time until 2022: 1 day, 12:28:27.266576
1 12 28 27 266576


In [None]:
# Using days, seconds and microseconds
# Build out hours, minutes?

In [16]:


now = datetime.now()
year = now.year
future = datetime(year + 1, 1, 1)

print(future - now)

1 day, 12:28:19.659370


In [19]:
# timesamps

import time

#  when the epoch started
print(time.gmtime(0))

# timestamps
time.time()

time.struct_time(tm_year=1970, tm_mon=1, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=1, tm_isdst=0)


1640838823.56873

In [24]:
# check python version dynamically
import time
from platform import python_version

version = python_version()
print(version)

if int(version[2]) >= 7:
    print("Nanoseconds since the epoch:", time.time_ns())

3.8.5
Nanoseconds since the epoch: 1640839056467075000


In [25]:

# to simulate how long something takes

before = datetime.now()

time.sleep(4)

after = datetime.now()

print(after - before)

0:00:04.002507
