In [None]:
#@title ## Coursework: Illustrated Prose & Poetry of Comical Hauntings

In [None]:
#@title Installations & Imports
from IPython.display import clear_output
from IPython.core.display import display, HTML
import os
import ipywidgets

from google.colab import drive, files
drive.mount('/content/drive')
drive_path = '/content/drive/My Drive/MSc/Sem2_CC/'
os.chdir(drive_path)

# to save and print images in the final result
jupyter_path = '/usr/local/share/jupyter'
temp_img_dir = '/nbextensions/generated_images/'
os.makedirs(jupyter_path+temp_img_dir, exist_ok=True)

try:
  import gpt_2_simple as gpt2
except Exception as e:
  # if this is running for the first time then download required libraries
  print(e)
  !sudo apt-get install wkhtmltopdf
  !pip install -r requirements.txt
  import gpt_2_simple as gpt2

  # There is a bug in jax, so uninstall this
  !pip uninstall -y jax jaxlib
  !pip install diffusers==0.11.1
  gpt2.download_gpt2(model_name="124M")

import nltk
nltk.download('punkt')

import gensim
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize
from gensim.models.doc2vec import Doc2Vec

import tensorflow as tf
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image

import pdfkit

import helper_functions as help_

tf.compat.v1.reset_default_graph()
tf_session1 = gpt2.start_tf_sess()
gpt2.load_gpt2(tf_session1, checkpoint='model-500')

clear_output()

In [None]:
# @title Generate a story
text = gpt2.generate(tf_session1, run_name='run1', seed=57,
                     nsamples=1, top_k=20, prefix="<|startoftext|>", include_prefix=False,
                     return_as_list=True)[0]

cleaned_lines = help_.clean_text(text)

with open('stories/%s.txt'%cleaned_lines[0],'w') as f:
  f.writelines(cleaned_lines)

In [None]:
# Display illustrated content
extra = "Greg Rutkowski"
html_str = "<html><body><h1> %s </h1>"%cleaned_lines[0].title()

for i in range(1,len(cleaned_lines)):
    print(i,'of',len(cleaned_lines)-1)
    content = cleaned_lines[i]

    # fetch images only if the sentence has at least 10 words
    if len(content.split(' ')) < 10:
        html_str += """
                <tr>
                    <td width="500"><h4>%s</h4></td>
                </tr>
                """%content
    elif content.isupper():
        html_str += """
                <tr>
                    <td><h1>%s</h1></td>
                </tr>
                """%content
    else:
        prompt = f"{content}. {extra}"
        image = help_.generate_img(prompt)
        img_path = "temp1_img%s.png"%i
        temp_path = jupyter_path+temp_img_dir+img_path
        image.resize((300,300)).save(temp_path)
        image.resize((300,300)).save(drive_path+'generated_images/'+img_path.replace('temp1',cleaned_lines[0]))

        if image:
            html_str += """
            <tr>
                <td width="500"><h4>%s</h4></td>
                <td><img src="%s" alt="img"/></td>
            </tr>
            """%(content,temp_img_dir+img_path)
            display(image)
        else:
            html_str += """
            <tr>
                <td width="500"><h4>%s</h4></td>
            </tr>
            """%content

complete_html = """
        <table>
          %s
        </table>
    </body></html>"""%(html_str)

with open('/nbextensions/result.html','w') as f:
  f.write(complete_html)

clear_output()

display(HTML(complete_html))

In [None]:
# @title Calculate similarity to training data using cosine distance
with open('avg_corpus_embeddings.npy', 'rb') as f:
    avg_embeddings = np.load(f)

model = Doc2Vec.load('doc2vec_model.model')
text_embeddings = model.infer_vector(word_tokenize(text.lower()))
cosine_similarity(avg_embeddings.reshape(1, -1), text_embeddings.reshape(1, -1))

In [None]:
# @title Storing the PDF of the generated story
with open('/nbextensions/result.html','w') as f:
  f.write(complete_html)

pdfkit.from_file('/usr/local/share/jupyter/nbextensions/result.html',
                 drive_path+'final_output/%s.pdf'%cleaned_lines[0],
                 options={"enable-local-file-access": ""}, verbose=True)