In [1]:
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
from contextlib import closing
import requests
import shutil
import os
import json
import pandas as pd
import time
import sys
import logging

base_dir = '../data'

os.listdir(base_dir)

['base_memes_from_imgflip.json',
 'base_memes_s3_links.json',
 'bleached_data.json',
 'Captions.txt',
 'Captions_new.txt',
 'cleaned_captions.txt',
 'combined_data.json',
 'memefly_bounding_box_data.json']

In [2]:
BASE_URL = 'https://imgflip.com/memetemplates'

start_num = 1
end_num = 21


def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)


def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    logging.error(e)

def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(requests.get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None
    
base_memes = {}
for i in range(start_num, end_num+1):
    url = BASE_URL + f'?page={i}'
    print(f'Fetching URL: {url}')
    content = simple_get(url)
    if content:
        soup = BeautifulSoup(content, 'html.parser')
        mt_boxes = soup.find('div', class_='mt-boxes')
        for mt_box in mt_boxes.find_all(class_="mt-box"):
            base_meme_title = mt_box.h3.a.text
            base_meme_href_path = mt_box.h3.a.get('href')
            base_meme_pic_url = f"https://imgflip.com/s{base_meme_href_path}.jpg"
            print(f"Meme Title: {base_meme_title}, Meme url: { base_meme_pic_url}")
            base_memes[base_meme_title] = base_meme_pic_url
        print(f'Completed fetching Page {i} base memes')

Fetching URL: https://imgflip.com/memetemplates?page=1
Meme Title: Woman Yelling At Cat, Meme url: https://imgflip.com/s/meme/Woman-Yelling-At-Cat.jpg
Meme Title: Distracted Boyfriend, Meme url: https://imgflip.com/s/meme/Distracted-Boyfriend.jpg
Meme Title: Drake Hotline Bling, Meme url: https://imgflip.com/s/meme/Drake-Hotline-Bling.jpg
Meme Title: Two Buttons, Meme url: https://imgflip.com/s/meme/Two-Buttons.jpg
Meme Title: Mocking Spongebob, Meme url: https://imgflip.com/s/meme/Mocking-Spongebob.jpg
Meme Title: Change My Mind, Meme url: https://imgflip.com/s/meme/Change-My-Mind.jpg
Meme Title: Left Exit 12 Off Ramp, Meme url: https://imgflip.com/s/meme/Left-Exit-12-Off-Ramp.jpg
Meme Title: Expanding Brain, Meme url: https://imgflip.com/s/meme/Expanding-Brain.jpg
Meme Title: Batman Slapping Robin, Meme url: https://imgflip.com/s/meme/Batman-Slapping-Robin.jpg
Meme Title: Blank Nut Button, Meme url: https://imgflip.com/s/meme/Blank-Nut-Button.jpg
Meme Title: Surprised Pikachu, Meme u

Meme Title: Buddy The Elf, Meme url: https://imgflip.com/s/meme/Buddy-The-Elf.jpg
Meme Title: Overly Attached Girlfriend, Meme url: https://imgflip.com/s/meme/Overly-Attached-Girlfriend.jpg
Meme Title: Young Cardi B, Meme url: https://imgflip.com/s/meme/Young-Cardi-B.jpg
Meme Title: How Tough Are You, Meme url: https://imgflip.com/s/meme/How-Tough-Are-You.jpg
Meme Title: Jack Nicholson The Shining Snow, Meme url: https://imgflip.com/s/meme/Jack-Nicholson-The-Shining-Snow.jpg
Meme Title: These Arent The Droids You Were Looking For, Meme url: https://imgflip.com/s/meme/These-Arent-The-Droids-You-Were-Looking-For.jpg
Meme Title: No Patrick, Meme url: https://imgflip.com/s/meme/No-Patrick.jpg
Meme Title: You Were The Chosen One (Star Wars), Meme url: https://imgflip.com/s/meme/You-Were-The-Chosen-One-Star-Wars.jpg
Meme Title: Impossibru Guy Original, Meme url: https://imgflip.com/s/meme/Impossibru-Guy-Original.jpg
Meme Title: Ill Have You Know Spongebob, Meme url: https://imgflip.com/s/mem

Meme Title: Kool Kid Klan, Meme url: https://imgflip.com/s/meme/Kool-Kid-Klan.jpg
Meme Title: Advice Yoda, Meme url: https://imgflip.com/s/meme/Advice-Yoda.jpg
Meme Title: Psy Horse Dance, Meme url: https://imgflip.com/s/meme/Psy-Horse-Dance.jpg
Meme Title: Money Man, Meme url: https://imgflip.com/s/meme/Money-Man.jpg
Meme Title: McKayla Maroney Not Impressed, Meme url: https://imgflip.com/s/meme/McKayla-Maroney-Not-Impressed.jpg
Meme Title: TED, Meme url: https://imgflip.com/s/meme/TED.jpg
Meme Title: Albert Einstein 1, Meme url: https://imgflip.com/s/meme/Albert-Einstein-1.jpg
Meme Title: PTSD Clarinet Boy, Meme url: https://imgflip.com/s/meme/PTSD-Clarinet-Boy.jpg
Meme Title: Duck Face Chicks, Meme url: https://imgflip.com/s/meme/Duck-Face-Chicks.jpg
Meme Title: Beard Baby, Meme url: https://imgflip.com/s/meme/Beard-Baby.jpg
Meme Title: Chill Out Lemur, Meme url: https://imgflip.com/s/meme/Chill-Out-Lemur.jpg
Meme Title: Bazooka Squirrel, Meme url: https://imgflip.com/s/meme/Bazooka

Meme Title: High Dog, Meme url: https://imgflip.com/s/meme/High-Dog.jpg
Meme Title: Fat Cat, Meme url: https://imgflip.com/s/meme/Fat-Cat.jpg
Meme Title: Scumbag Boss, Meme url: https://imgflip.com/s/meme/Scumbag-Boss.jpg
Meme Title: Condescending Goku, Meme url: https://imgflip.com/s/meme/Condescending-Goku.jpg
Meme Title: Bitch Please, Meme url: https://imgflip.com/s/meme/Bitch-Please.jpg
Meme Title: Grumpy Cat Bed, Meme url: https://imgflip.com/s/meme/Grumpy-Cat-Bed.jpg
Meme Title: Ermahgerd Beyonce, Meme url: https://imgflip.com/s/meme/Ermahgerd-Beyonce.jpg
Meme Title: Troll Face Colored, Meme url: https://imgflip.com/s/meme/Troll-Face-Colored.jpg
Meme Title: Sigmund Freud, Meme url: https://imgflip.com/s/meme/Sigmund-Freud.jpg
Meme Title: Foul Bachelor Frog, Meme url: https://imgflip.com/s/meme/Foul-Bachelor-Frog.jpg
Meme Title: Close Enough, Meme url: https://imgflip.com/s/meme/Close-Enough.jpg
Meme Title: Lazy College Senior, Meme url: https://imgflip.com/s/meme/Lazy-College-Sen

Meme Title: Kyon Face Palm, Meme url: https://imgflip.com/s/meme/Kyon-Face-Palm.jpg
Meme Title: USA Lifter, Meme url: https://imgflip.com/s/meme/USA-Lifter.jpg
Meme Title: Aw Yeah Rage Face, Meme url: https://imgflip.com/s/meme/Aw-Yeah-Rage-Face.jpg
Meme Title: Tom Hardy , Meme url: https://imgflip.com/s/meme/Tom-Hardy-.jpg
Meme Title: Criana, Meme url: https://imgflip.com/s/meme/Criana.jpg
Meme Title: FRANGO, Meme url: https://imgflip.com/s/meme/FRANGO.jpg
Meme Title: Slick Fry, Meme url: https://imgflip.com/s/meme/Slick-Fry.jpg
Meme Title: Sheltering Suburban Mom, Meme url: https://imgflip.com/s/meme/Sheltering-Suburban-Mom.jpg
Meme Title: Merida Brave, Meme url: https://imgflip.com/s/meme/Merida-Brave.jpg
Meme Title: Frustrating Mom, Meme url: https://imgflip.com/s/meme/Frustrating-Mom.jpg
Meme Title: Darth Maul, Meme url: https://imgflip.com/s/meme/Darth-Maul.jpg
Meme Title: Big Bird, Meme url: https://imgflip.com/s/meme/Big-Bird.jpg
Meme Title: Romney, Meme url: https://imgflip.co

Meme Title: I Forsee, Meme url: https://imgflip.com/s/meme/I-Forsee.jpg
Meme Title: Fabulous Frank And His Snake, Meme url: https://imgflip.com/s/meme/Fabulous-Frank-And-His-Snake.jpg
Meme Title: Samuel L Jackson, Meme url: https://imgflip.com/s/meme/Samuel-L-Jackson.jpg
Meme Title: Speechless Colbert Face, Meme url: https://imgflip.com/s/meme/Speechless-Colbert-Face.jpg
Meme Title: Superior Wadsworth, Meme url: https://imgflip.com/s/meme/Superior-Wadsworth.jpg
Meme Title: Hot Caleb, Meme url: https://imgflip.com/s/meme/Hot-Caleb.jpg
Meme Title: Nilo, Meme url: https://imgflip.com/s/meme/Nilo.jpg
Meme Title: Beyonce Knowles Superbowl Face, Meme url: https://imgflip.com/s/meme/Beyonce-Knowles-Superbowl-Face.jpg
Meme Title: Portuguese, Meme url: https://imgflip.com/s/meme/Portuguese.jpg
Meme Title: Mad Moxxi, Meme url: https://imgflip.com/s/meme/Mad-Moxxi.jpg
Meme Title: ERMAHGERD TWERLERT, Meme url: https://imgflip.com/s/meme/ERMAHGERD-TWERLERT.jpg
Meme Title: Bothered Bond, Meme url: h

In [3]:
with open(os.path.join(base_dir, "base_memes_from_imgflip.json"), "w") as wobj:
    json.dump(base_memes, wobj, indent=4)
    print("Wrote base memes as JSON object")
os.listdir(base_dir)

Wrote base memes as JSON object


['base_memes_from_imgflip.json',
 'base_memes_s3_links.json',
 'bleached_data.json',
 'Captions.txt',
 'Captions_new.txt',
 'cleaned_captions.txt',
 'combined_data.json',
 'memefly_bounding_box_data.json']