### PDF Extractor
I extracted a pdf of a Blue Chip article about consumer habits during the pandemic.

In [69]:
import fitz
import pandas as pd
from collections import Counter

doc = fitz.open('BlueChip.pdf')
text = "".join(page.get_text("text") for page in doc)
words = pd.Series(text.split())
words.value_counts().head(30)

the           113
and            81
to             73
of             61
in             43
a              30
shopping       24
their          23
will           19
that           18
shoppers       18
brands         17
grocery        17
in-store       16
experience     15
2020           15
for            15
with           15
blue           15
rights         14
chip.          14
All            14
©              14
as             14
reserved.      14
be             13
new            13
retailers      13
have           12
online         12
dtype: int64

### Reddit Image Transcriber
I used the r/aww reddit thread which contains both photos and videos.

In [18]:
import requests
from PIL import Image
import io
import pytesseract
import pandas as pd
data = requests.get("https://www.reddit.com/r/comics/.json", headers = {'User-agent': 'your bot 0.1'}).json()


In [19]:
#The title and url for each post
title = []
url = []
for i in data['data']['children']:
    title.append(i['data']['title'])
    if(i['data']['url'].endswith('.png') or i['data']['url'].endswith('.jpg')):
        url.append(i['data']['url'])
url

['https://i.redd.it/rxudiqlr5yy61.jpg',
 'https://i.redd.it/8cpd7of79vy61.jpg',
 'https://i.imgur.com/QOVhelr.png',
 'https://i.redd.it/kvuiu3vudyy61.png',
 'https://i.redd.it/zs0sm5ckczy61.png',
 'https://i.redd.it/s06l1vt9twy61.png',
 'https://i.redd.it/hvn1c03n5yy61.jpg',
 'https://i.redd.it/l6vcz7p81wy61.png',
 'https://i.redd.it/lc85u4y8pzy61.png',
 'https://i.redd.it/u75r9fqi2vy61.png',
 'https://i.redd.it/5wmhdfnkezy61.jpg',
 'https://i.redd.it/qlybqkx4gvy61.jpg',
 'https://i.redd.it/uw6qfs3bkyy61.jpg',
 'https://i.redd.it/0y466fws5zy61.jpg',
 'https://i.redd.it/y9009ob8ovy61.png',
 'https://i.redd.it/fcs6xar5bvy61.jpg',
 'https://i.redd.it/oadn8h49dvy61.jpg',
 'https://i.redd.it/amyod5p5zyy61.jpg',
 'https://i.redd.it/q2i54ifn6qy61.jpg',
 'https://i.redd.it/gdwwbki7dyy61.png',
 'https://i.imgur.com/XSMd6ft.jpg',
 'https://i.redd.it/k81hb4tklxy61.png',
 'https://i.redd.it/s6wxr8xd7yy61.png',
 'https://i.redd.it/pfcbiyfuawy61.png']

In [20]:
text = []
words = []
for u in url:
    response = requests.get(u+'.json')
    img = Image.open(io.BytesIO(response.content))
    text.append(pytesseract.image_to_string(img))
for t in text:
    words.extend(t.split())
words
#from textblob import TextBlob
#blob = TextBlob(text)
#print(blob.noun_phrases)

#Sorted with pandas
#pd.Series(blob.word_counts).sort_values().tail(30)


['lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'wait,',
 'i',
 'graduated',
 'three',
 'years',
 'ago',
 '—',
 '—”',
 'BL',
 'U',
 '3)',
 'Pa',
 '®',
 '+',
 '&',
 'oO',
 '©',
 '—',
 'oO',
 'lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'lyou',
 'are',
 'looking',
 'for',
 'an',
 'image,',
 'itwas',
 'probably',
 'deleted.',
 'lyou',
 'are',
 'looking',
 'for',
 '

### Face Finding
I used a stock image of a couple people standing and smiling at the camera for my image

In [66]:
import numpy as np
import cv2
import urllib
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')


img = cv2.imread('stockphoto.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)


In [67]:
for (x,y,w,h) in faces:
    img = cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
    roi_gray = gray[y:y+h, x:x+w]
    roi_color = img[y:y+h, x:x+w]
    eyes = eye_cascade.detectMultiScale(roi_gray)
    for (ex,ey,ew,eh) in eyes:
        cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)

cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()