# **AI Storytelling**

<img src="assets/logo.jpg" width="500" height="300">

This is Natural Language Processing platform that targets on conversion of short stories to audiobooks with features:
- characters extraction from text,
- voice generation by dialogues, narration and characters,
- musical background creation by text,
- activities sound generation by lines, and
- combination of all above features to create the audiobook.

Based on the success of the project, it could be extended with image or video processing features in upcoming days.

### Import Libraries and Functions

In [3]:
# basic libraries
import os
import pandas as pd

In [103]:
# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [104]:
# nlp libraries
import spacy
nlp = spacy.load("en_core_web_sm")

In [7]:
# text-to-speech libraries
from gtts import gTTS
from moviepy.editor import concatenate_audioclips, AudioFileClip
import librosa
import soundfile as sf

### Files and Dataframes Defining

In [211]:
# checking stories
os.listdir("stories")

['beautiful garden.txt', 'cafe at midnight.txt', 'the painted door.txt']

In [227]:
# input title
title = input("Enter text file name:")

Enter text file name: beautiful garden


In [228]:
# opening text file
story = f"stories/{title}.txt"
with open(story, "r") as f:
    text = f.read()

In [229]:
# create dataframes
df_characters = pd.DataFrame(columns=["cid", "name", "frequency", "features"])
df_lines = pd.DataFrame(columns=["pid", "ndid", "character", "dialogue", "narration"])

# Characters Identification

In [230]:
# finding personal entities from text
entities = nlp(text).ents
personal_entities = []
for entity in entities:
    if entity.label_ == 'PERSON':
        personal_entities.append(entity.text)
personal_entities = list(set(personal_entities))

In [231]:
# adding persons and counts to df_characters
cid_num = 0
for person in personal_entities:
    df_characters = df_characters._append({
        'cid': cid_num, 'name': person, 'frequency': text.count(person), 'features': None}, ignore_index=True)
    cid_num += 1

In [232]:
# characters
df_characters.head(10)

Unnamed: 0,cid,name,frequency,features
0,0,Lily,12,
1,1,Thompson,8,


# Lines Identification

In [233]:
# converting text to paragraphs
paragraphs = text.split("\n")
non_empty_paragraphs = list(filter(lambda x: x != '', paragraphs))

In [234]:
# function to identify narrations and dialogues
def identify_narrations_and_dialogues(paragraph):
    """
    :param paragraph: string of paragraph in a story
    :return: list of tuples in (id, name_of_speaker, dialogue, narration) format
    """
    divisions = paragraph.split('"')
    divisions = list(filter(lambda x: x != '', divisions))
    i = 0
    identification_type = 'D' if '"' in divisions[0] else 'N'
    narrations_and_dialogues = []
    for division in divisions:
        if identification_type == 'D':
            narrations_and_dialogues.append((i, None, division, None))
            identification_type = 'N'
        else:
            narrations_and_dialogues.append((i, None, None, division))
            identification_type = 'D'
        i += 1
    return narrations_and_dialogues


In [235]:
# identifying lines (narrations or dialogues) from each paragraphs
pid_num = 0
for paragraph in non_empty_paragraphs:
    for row in [(pid_num,)+nad for nad in identify_narrations_and_dialogues(paragraph)]:
        df_lines = df_lines._append(pd.Series(row, index=df_lines.columns), ignore_index=True)
    pid_num += 1

In [236]:
# lines
df_lines.head(10)

Unnamed: 0,pid,ndid,character,dialogue,narration
0,0,0,,,"Once upon a time, in a small, quiet village, t..."
1,1,0,,,"One sunny afternoon, as Lily was by the river,..."
2,2,0,,,"Unbeknownst to Lily, a kind stranger had been ..."
3,2,1,,You have a heart as beautiful as that butterfl...,
4,3,0,,,"Lily blushed, not used to receiving compliment..."
5,3,1,,I've been searching for someone just like you....,
6,4,0,,,Lily's eyes sparkled with excitement. She had ...
7,4,1,,"I'd love to help,",
8,4,2,,,she replied.
9,5,0,,,"From that day on, Lily spent her afternoons ca..."


# Audio Generation

In [237]:
# creating audios
print("Step 1: CONVERSIONS")
for index, row in df_lines.iterrows():
    if row['dialogue'] is not None:
        speech_gtts = gTTS(text=row['dialogue'], lang='en', slow=False, tld='co.in')
    else:
        speech_gtts = gTTS(text=row['narration'], lang='en', slow=False, tld='ie')
    temp_file = f"conversions/{index}.mp3"
    print(f"Line {index+1}/{df_lines.shape[0]} converted.")
    speech_gtts.save(temp_file)
print("Conversions finished.")

Step 1: CONVERSIONS
Line 1/16 converted.
Line 2/16 converted.
Line 3/16 converted.
Line 4/16 converted.
Line 5/16 converted.
Line 6/16 converted.
Line 7/16 converted.
Line 8/16 converted.
Line 9/16 converted.
Line 10/16 converted.
Line 11/16 converted.
Line 12/16 converted.
Line 13/16 converted.
Line 14/16 converted.
Line 15/16 converted.
Line 16/16 converted.
Conversions finished.


In [238]:
# combining audios
print("Step 2: COMBINATION")
clips = [AudioFileClip(f"conversions/{i}.mp3") for i in range(df_lines.shape[0])]
final_clip = concatenate_audioclips(clips)
print("Combination finished.")

Step 2: COMBINATION
Combination finished.


In [239]:
# adjust speed
print("Step 3: ADJUSTMENT")
final_clip.write_audiofile("conversions/final_slow.mp3")
print("final_slow.mp3 downloaded.")
y, sr = librosa.load("conversions/final_slow.mp3", sr=None)
y_speed = librosa.effects.time_stretch(y, rate=1.25)
print("Speeded over.")

Step 3: ADJUSTMENT
MoviePy - Writing audio in conversions/final_slow.mp3


                                                                                                                       

MoviePy - Done.
final_slow.mp3 downloaded.
Speeded over.


In [240]:
# removing audios
print("Step 4: REMOVAL")
for i in range(df_lines.shape[0]):
    os.remove(f"conversions/{i}.mp3")
    print(f"Removed {i}.mp3")
os.remove("conversions/final_slow.mp3")
print("Removal over.")

Step 4: REMOVAL
Removed 0.mp3
Removed 1.mp3
Removed 2.mp3
Removed 3.mp3
Removed 4.mp3
Removed 5.mp3
Removed 6.mp3
Removed 7.mp3
Removed 8.mp3
Removed 9.mp3
Removed 10.mp3
Removed 11.mp3
Removed 12.mp3
Removed 13.mp3
Removed 14.mp3
Removed 15.mp3
Removal over.


In [241]:
# downloading final audio
print("Step 5: DOWNLOADING")
final_title = title.replace(" ", "_")
sf.write(f"audiobooks/{final_title}.mp3", y_speed, sr)
print(f"{final_title}.mp3 downloaded")

Step 5: DOWNLOADING
beautiful_garden.mp3 downloaded
