In [2]:
from typing import Dict, List
from bs4 import BeautifulSoup
import pandas as pd
import requests
import spacy

In [3]:
url: Dict[str, str] = {
    'grammarbook': 'https://www.grammarbook.com/blog/definitions/walks-into-a-bar/',
    'thrillist': 'https://www.thrillist.com/culture/best-walks-into-a-bar-jokes',
    'jokojokes': 'https://jokojokes.com/walks-into-a-bar-jokes.html',
    'gamertelligence': 'https://www.gamertelligence.com/walks-into-a-bar-jokes/'
}

In [4]:
headers = {
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36'
}

response = requests.get(url['grammarbook'], headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

div = soup.find_all('div', class_='content_user-content')[0]

In [5]:
jokes: List[str] = []
for i, item in enumerate(div.find_all('p')):
    print(f"{i}. {item.text}")

0. The phrase A ______ walks into a bar has provided the take-off point for an uncountable number of jokes over the years. No matter what one’s opinion is of bars, we hope that everyone can appreciate the lessons in English grammar contained in the clever sentences that follow:
1. A dangling participle walks into a bar. Enjoying a cocktail and chatting with the bartender, the evening passes pleasantly.
2. A bar was walked into by the passive voice.
3. An oxymoron walked into a bar, and the silence was deafening.
4. Two quotation marks walk into a “bar.”
5. A malapropism walks into a bar, looking for all intensive purposes like a wolf in cheap clothing, muttering epitaphs and casting dispersions on his magnificent other, who takes him for granite.
6. Hyperbole totally rips into this insane bar and absolutely destroys everything.
7. A question mark walks into a bar?
8. A non sequitur walks into a bar. In a strong wind, even turkeys can fly.
9. Papyrus and Comic Sans walk into a bar. The 

In [6]:
for item in div.find_all('p'):
    jokes.append(item.text)
    
jokes = jokes[1:28]
jokes

['A dangling participle walks into a bar. Enjoying a cocktail and chatting with the bartender, the evening passes pleasantly.',
 'A bar was walked into by the passive voice.',
 'An oxymoron walked into a bar, and the silence was deafening.',
 'Two quotation marks walk into a “bar.”',
 'A malapropism walks into a bar, looking for all intensive purposes like a wolf in cheap clothing, muttering epitaphs and casting dispersions on his magnificent other, who takes him for granite.',
 'Hyperbole totally rips into this insane bar and absolutely destroys everything.',
 'A question mark walks into a bar?',
 'A non sequitur walks into a bar. In a strong wind, even turkeys can fly.',
 'Papyrus and Comic Sans walk into a bar. The bartender says, “Get out—we don’t serve your type.”',
 'A mixed metaphor walks into a bar, seeing the handwriting on the wall but hoping to nip it in the bud.',
 'A comma splice walks into a bar, it has a drink and then leaves.',
 'Three intransitive verbs walk into a bar

In [7]:
response = requests.get(url['thrillist'], headers=headers)    

In [8]:
soup = BeautifulSoup(response.content, 'html.parser')


jokes_thrillist = list(map(lambda para: para.text, soup.find_all('p')))
jokes_thrillist = jokes_thrillist[1:-1]
jokes_thrillist[:5]

['You may think you’ve heard every joke that begins, “So X walks into a bar,” but we’re pretty confident you’ve missed a few. Refresh your dad joke repertoire and earn your rightful place as the resident comic at your local bar with these great “walks into a bar” jokes. (We promise not to tell anyone where you got all your material.) ',
 ' A guy walks into a bar and asks for 10 shots of the establishment’s finest single malt scotch. The bartender sets him up, and the guy takes the first shot in the row and pours it on the floor. He then takes the last shot in the row and does the same. The bartender asks, "Why did you do that?" And the guy replies, "Well the first shot always tastes like crap, and the last one always makes me sick!" ',
 ' Three vampires walk into a bar. The first one says, "I\'ll have a pint of blood." The second one says, "I\'ll have one, too." The third one says, "I\'ll have a pint of plasma." The bartender says, "So, that\'ll be two Bloods and a Blood Lite?" ',
 ' A

In [9]:
jokes += jokes_thrillist

In [10]:
response = requests.get(url['jokojokes'], headers=headers)

In [11]:
soup = BeautifulSoup(response.content, 'html.parser')

list_jokojokes = list(map(lambda div: div.text, soup.find_all('div', class_='listjoke')))
for joke in list_jokojokes:
    jokes.append(joke.replace('👍🏼', ''))

In [41]:
nlp = spacy.load('en_core_web_sm')
sample_text = jokes[0]

doc = nlp(sample_text)

# print(sample_text)

for joke in jokes:
    first_sent = list(nlp(joke).sents)[0]
    print(tuple(first_sent.noun_chunks))
    

(a bar,)
(A bar, the passive voice)
(An oxymoron, a bar, the silence)
(Two quotation marks, a “bar)
(A malapropism, a bar, all intensive purposes, a wolf, cheap clothing, epitaphs, dispersions, who, him, granite)
(Hyperbole, this insane bar, everything)
(A question mark, a bar)
(A non sequitur, a bar)
(Papyrus and Comic Sans, a bar)
(A mixed metaphor, a bar, the handwriting, the wall, it, the bud)
(splice, a bar, it, a drink)
(Three intransitive verbs, a bar)
(A synonym, a tavern)
(the end, the day, a cliché, a bar, a daisy, a button, a tack)
(a bar, it)
(the chiasmus, the bar floor)
(A figure, speech, a bar)
(An allusion, a bar, the fact, alcohol, its Achilles heel)
(a bar, it)
(A misplaced modifier, a bar, a man, a glass eye, Ralph)
(The past, future, a bar)
(A dyslexic, a bra)
(A verb, a bar, a beautiful noun, they)
(An Oxford comma, a bar, it, the evening, the television, drunk and smoking cigars)
(A simile, a bar, a desert)
(A gerund, an infinitive walk, a bar)
(A hyphenated word,

In [13]:
response = requests.get(url['gamertelligence'], headers=headers)
# this website has the jokes categorized into different genres, which can be
# used later for genre categorization

In [14]:
soup = BeautifulSoup(response.content, 'html.parser')

outer = soup.find_all(id='Best')[0].parent