In [1]:
import os
import re
import json
import pandas as pd
import matplotlib.pyplot as plt

from collections import defaultdict
from random import sample

In [2]:
data_path = os.path.join('..', 'data')
mediasum_path = os.path.join(data_path, 'mediasum', 'news_dialogue.json')
feather_path = os.path.join(data_path, 'mediasum', 'mediasum.ftr')

In [39]:
def print_size(dfs):
    total_size = 0
    for key, val in dfs.items():
        size = len(val)
        print(f'{key}size',size)
        total_size += size
        
    print('Total size', total_size)


def print_first_row(df):
    line_0 = df.iloc[0]
    for key in line_0.keys():
        print(key.upper())
        print(line_0[key])
        print('')

In [4]:
def create_dialogue(speaker_list, utt_list):
    dialogue = ''
    for s, u in zip(speaker_list, utt_list):
        utt = f'{s}: {u}\n'
        dialogue += utt 
    
    return dialogue

In [5]:
#if os.path.isfile(feather_path):
#    mediasum_df = pd.read_feather(feather_path)
#else:
mediasum_df = pd.read_json(mediasum_path, lines=True).T
mediasum_df = pd.DataFrame([row[0] for _, row in mediasum_df.iterrows()])
#    mediasum_df.to_feather(feather_path)

In [6]:
utt = mediasum_df['utt']
speakers = mediasum_df['speaker']

In [7]:
dialogues = speakers.combine(utt, create_dialogue)

In [17]:
dialogues = dialogues.rename(columns={0: 'dialogues'})

In [19]:
dialogues_path = os.path.join(data_path, 'mediasum', 'dialogues.ftr')
dialogues.to_feather(dialogues_path)

In [22]:
dialogues.head()

Unnamed: 0,dialogues
0,"FARAI CHIDEYA, host: Now, moving on, Forest Wh..."
1,"RACHEL MARTIN, HOST: You have heard it again a..."
2,"Mr. JEFF OBAFEMI CARR (Actor, Playwright): I c..."
3,"FARAI CHIDEYA, host: This is NEWS & NOTES. I'm..."
4,"FARAI CHIDEYA, host: I'm Farai Chideya and thi..."


In [20]:
mediasum_df = pd.concat([mediasum_df, dialogues])

In [25]:
mediasum_df["dialogues"] = dialogues

In [26]:
mediasum_df.head()

Unnamed: 0,id,program,date,url,title,summary,utt,speaker,dialogues
0,NPR-1,News & Notes,2007-11-28,https://www.npr.org/templates/story/story.php?...,Black Actors Give Bible Star Appeal,"More than 400 black actors, artists and minist...","[Now, moving on, Forest Whitaker as Moses, Tis...","[FARAI CHIDEYA, host, FARAI CHIDEYA, host, Mr....","FARAI CHIDEYA, host: Now, moving on, Forest Wh..."
1,NPR-2,Weekend Edition Sunday,2016-10-23,https://www.npr.org/2016/10/23/499042298/young...,"Young, First-Time Voters Share Views On Electi...",NPR's Rachel Martin speaks with young voters w...,[You have heard it again and again - this is a...,"[RACHEL MARTIN, HOST, ASHANTI MARTINEZ, LAUREN...","RACHEL MARTIN, HOST: You have heard it again a..."
2,NPR-3,News & Notes,2007-11-30,https://www.npr.org/templates/story/story.php?...,Snapshots: On Solid Ground,"In this week's snapshot, actor and playwright ...","[I came close to running out of luck, when I a...","[Mr. JEFF OBAFEMI CARR (Actor, Playwright), CH...","Mr. JEFF OBAFEMI CARR (Actor, Playwright): I c..."
3,NPR-4,News & Notes,2007-11-30,https://www.npr.org/templates/story/story.php?...,"Washington, D.C. Facing HIV/AIDS Epidemic",A new study says one in 50 people in the natio...,"[This is NEWS & NOTES. I'm Farai Chideya., In ...","[FARAI CHIDEYA, host, FARAI CHIDEYA, host, Dr....","FARAI CHIDEYA, host: This is NEWS & NOTES. I'm..."
4,NPR-5,News & Notes,2007-11-30,https://www.npr.org/templates/story/story.php?...,Coping When AIDS Hits Your Family: Part II,When a family member is diagnosed with HIV/AID...,"[I'm Farai Chideya and this is NEWS & NOTES., ...","[FARAI CHIDEYA, host, FARAI CHIDEYA, host, FAR...","FARAI CHIDEYA, host: I'm Farai Chideya and thi..."


In [28]:
mediasum_df.reset_index(inplace=True)

In [29]:
mediasum_df.to_feather(feather_path)

In [30]:
mediasum_df.head()

Unnamed: 0,index,id,program,date,url,title,summary,utt,speaker,dialogues
0,0,NPR-1,News & Notes,2007-11-28,https://www.npr.org/templates/story/story.php?...,Black Actors Give Bible Star Appeal,"More than 400 black actors, artists and minist...","[Now, moving on, Forest Whitaker as Moses, Tis...","[FARAI CHIDEYA, host, FARAI CHIDEYA, host, Mr....","FARAI CHIDEYA, host: Now, moving on, Forest Wh..."
1,1,NPR-2,Weekend Edition Sunday,2016-10-23,https://www.npr.org/2016/10/23/499042298/young...,"Young, First-Time Voters Share Views On Electi...",NPR's Rachel Martin speaks with young voters w...,[You have heard it again and again - this is a...,"[RACHEL MARTIN, HOST, ASHANTI MARTINEZ, LAUREN...","RACHEL MARTIN, HOST: You have heard it again a..."
2,2,NPR-3,News & Notes,2007-11-30,https://www.npr.org/templates/story/story.php?...,Snapshots: On Solid Ground,"In this week's snapshot, actor and playwright ...","[I came close to running out of luck, when I a...","[Mr. JEFF OBAFEMI CARR (Actor, Playwright), CH...","Mr. JEFF OBAFEMI CARR (Actor, Playwright): I c..."
3,3,NPR-4,News & Notes,2007-11-30,https://www.npr.org/templates/story/story.php?...,"Washington, D.C. Facing HIV/AIDS Epidemic",A new study says one in 50 people in the natio...,"[This is NEWS & NOTES. I'm Farai Chideya., In ...","[FARAI CHIDEYA, host, FARAI CHIDEYA, host, Dr....","FARAI CHIDEYA, host: This is NEWS & NOTES. I'm..."
4,4,NPR-5,News & Notes,2007-11-30,https://www.npr.org/templates/story/story.php?...,Coping When AIDS Hits Your Family: Part II,When a family member is diagnosed with HIV/AID...,"[I'm Farai Chideya and this is NEWS & NOTES., ...","[FARAI CHIDEYA, host, FARAI CHIDEYA, host, FAR...","FARAI CHIDEYA, host: I'm Farai Chideya and thi..."


In [33]:
print_first_row(mediasum_df)

INDEX
0

ID
NPR-1

PROGRAM
News & Notes

DATE
2007-11-28

URL
https://www.npr.org/templates/story/story.php?storyId=16697288

TITLE
Black Actors Give Bible Star Appeal

SUMMARY
More than 400 black actors, artists and ministers are bringing the Gospel to life in the audio book, The Bible Experience:The Complete Bible. Farai Chideya talks with producer Kyle Bowser and actress Wendy Raquel Robinson, who lends her voice to the project.

UTT
['Now, moving on, Forest Whitaker as Moses, Tisha Campbell Martin as Mary Magdalene - well, that\'s all in "The Bible Experience." A New Testament edition was released in 2006. This edition is billed as "The Complete Bible." It doesn\'t have one person reading the gospels. It features nearly 400 African-American artists, actors and ministers, plus sound effects.', "Just listen to Blair Underwood's rendition of Jesus on the cross.", '(As Jesus) My God, my God, why have you forsaken me?', 'Now, we\'ve got two people affiliated with the project with us tod

In [35]:
for line in mediasum_df.iloc[0]['dialogues'].split('\n'):
    print(line)

FARAI CHIDEYA, host: Now, moving on, Forest Whitaker as Moses, Tisha Campbell Martin as Mary Magdalene - well, that's all in "The Bible Experience." A New Testament edition was released in 2006. This edition is billed as "The Complete Bible." It doesn't have one person reading the gospels. It features nearly 400 African-American artists, actors and ministers, plus sound effects.
FARAI CHIDEYA, host: Just listen to Blair Underwood's rendition of Jesus on the cross.
Mr. BLAIR UNDERWOOD (Actor): (As Jesus) My God, my God, why have you forsaken me?
FARAI CHIDEYA, host: Now, we've got two people affiliated with the project with us today. Kyle Bowser, he co-produced "The Bible Experience" and actress Wendy Raquel Robinson, one of the actors in "The Bible Experience," and she also stars in the CW series, "The Game."
FARAI CHIDEYA, host: Hi folks, how are you doing?
Ms. WENDY RAQUEL ROBINSON (Actress): Great.
Mr. KYLE BOWSER (Co-producer, "The Bible Experience: The Complete Bible"): Great. How

In [40]:
dialogsum_df = print_size_and_concat({
    'total ': mediasum_df
})

total size 927192
Total size 927192


In [41]:
print('TOPICS')
for i in range(10):
    print(mediasum_df.iloc[i]['title'])

TOPICS
Black Actors Give Bible Star Appeal
Young, First-Time Voters Share Views On Election In Two Weeks
Snapshots: On Solid Ground
Washington, D.C. Facing HIV/AIDS Epidemic
Coping When AIDS Hits Your Family: Part II
Dissecting the Republican Debate
Letters: Anger and Gloating
What Salmonella Scare Means For Farmers
Economics Wrap-Up: Black Friday Boom
Problems for Southern Baptists
