Created by: Akshay Chougule

Date: 12 Aug 2024 

## 1 Story Generation

In [1]:
#!pip install -U langchain-ollama

In [2]:
from langchain_ollama import OllamaLLM

llm = OllamaLLM(model="llama3.1")

In [3]:
response = llm.invoke("Can you write a very short story of 7 sentences for 3 year old kid?")
print(response)

Here is a very short story:

Benny the bear loved to play outside. One sunny day, Benny went on an adventure in the woods. He saw a bright yellow flower and picked it to give to his mom. When he got home, Mommy Bear said "Oh, I love this flower!" She put it in a vase and they both smiled at it together. Benny was happy that he could make his mommy smile. Then Benny went outside to play some more!


In [4]:
type(response)

str

In [6]:
temp = response.replace('Here is a very short story:\n\n','')
temp 

'Benny the bear loved to play outside. One sunny day, Benny went on an adventure in the woods. He saw a bright yellow flower and picked it to give to his mom. When he got home, Mommy Bear said "Oh, I love this flower!" She put it in a vase and they both smiled at it together. Benny was happy that he could make his mommy smile. Then Benny went outside to play some more!'

In [2]:
# alternative starting point
# temp = """Benny the bear loved to play outside. One sunny day, Benny went on an adventure in the woods. He saw a bright yellow flower. When he got home, Mommy Bear said "Oh, I love this flower!" She put the flower in a vase. Benny bear and mommy bear both smiled."""

In [3]:
import re
sentences = re.split(r' *[\.\?][\'"\)\]]* *', temp)

In [4]:
sentences

['Benny the bear loved to play outside',
 'One sunny day, Benny went on an adventure in the woods',
 'He saw a bright yellow flower',
 'When he got home, Mommy Bear said "Oh, I love this flower!" She put the flower in a vase',
 'Benny bear and mommy bear both smiled',
 '']

### 2 Text to Speech

In [14]:
# !pip install transformers
# !pip install gradio
# !pip install timm
# !pip install timm
# !pip install inflect
# !pip install phonemizer

In [20]:
from transformers.utils import logging

logging.set_verbosity_error()

In [21]:
from transformers import pipeline

narrator = pipeline("text-to-speech",
                    model="kakao-enterprise/vits-ljs")



In [22]:
temp

'Benny the bear loved to play outside. One sunny day, Benny went on an adventure in the woods. He saw a bright yellow flower. When he got home, Mommy Bear said "Oh, I love this flower!" She put the flower in a vase. Benny bear and mommy bear both smiled.'

In [23]:
text = temp

In [24]:
narrated_text = narrator(temp)

In [25]:
# Play here
from IPython.display import Audio as IPythonAudio

IPythonAudio(narrated_text["audio"][0],
             rate=narrated_text["sampling_rate"])

In [None]:
# Or save it
import scipy
scipy.io.wavfile.write("story.wav", 
                       rate=narrated_text["sampling_rate"], 
                       data=narrated_text["audio"][0])

In [30]:
type(narrated_text), narrated_text, type(narrated_text["audio"][0])

(dict,
 {'audio': array([[ 5.4739695e-04,  7.3264731e-04,  3.9657738e-04, ...,
          -4.4048102e-05, -1.8110548e-05, -8.1422986e-06]], dtype=float32),
  'sampling_rate': 22050},
 numpy.ndarray)

In [36]:
def sentence_to_audio(sentences):
    for i in range((len(sentences))-1):
        narrated_text = narrator(sentences[i])
        scipy.io.wavfile.write(f'take3_audio{i}.mp3', 
                       rate=narrated_text["sampling_rate"], 
                       data=narrated_text["audio"][0]
        )

In [38]:
sentence_to_audio(sentences)

## 3 Text to Video

In [5]:
import torch
from diffusers import AnimateDiffPipeline, LCMScheduler, MotionAdapter
from diffusers.utils import export_to_gif

To avoid PEFT backend required error:

(As of 1 April 2024) Ensure your library has the following versions:

    peft >= 0.6.0
    transformers >= 4.34.0


In [6]:
import transformers
transformers.__version__

'4.38.2'

In [7]:
#!pip install peft
import peft
peft.__version__

'0.12.0'

In [8]:
adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", torch_dtype=torch.float16)
pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")

pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
pipe.set_adapters(["lcm-lora"], [0.8])

pipe.enable_vae_slicing()
pipe.enable_model_cpu_offload()

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]



In [19]:
sentences[0]

'Benny the bear loved to play outside'

In [20]:
len(sentences)

6

In [25]:
# if cuda is out of memory then
# do nvidia-smi to check processed running
# use the process if (PID) to kill the process with
# sudo kill -5853 PID

In [9]:
def sentence_to_gif(sentences):
    for i in range(len(sentences)):
        output = pipe(
            prompt=f'{sentences[i]}, low resolution cartoon',
            negative_prompt="bad quality, worse quality",
            num_frames=16,
            guidance_scale=2.0,
            num_inference_steps=6,
            generator=torch.Generator("cpu").manual_seed(0),
        )
        frames = output.frames[0]
        export_to_gif(frames, f'take3_scene_{i}.gif')

In [10]:
sentence_to_gif(sentences=sentences)

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

In [39]:
# import gc
# torch.cuda.empty_cache()
# gc.collect()

## 4 Patch audio and gif together

In [15]:
#!pip install --upgrade pip

In [17]:
#!pip install moviepy

In [40]:
from moviepy.editor import *

In [51]:
# Import the audio(Insert to location of your audio instead of audioClip.mp3)
audio = AudioFileClip("take3_/take3_audio0.mp3")
# Import the Image and set its duration same as the audio (Insert the location of your photo instead of photo.jpg)
clip = ImageClip("take3_/take3_scene_0.gif").set_duration(audio.duration)

In [52]:
# Set the audio of the clip
clip = clip.set_audio(audio)

In [53]:
audio.duration

2.48

In [54]:
# Export the clip
clip.write_videofile("video.mp4", fps=24)

Moviepy - Building video video.mp4.
MoviePy - Writing audio in videoTEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video video.mp4





TypeError: must be real number, not NoneType