### Download Podcasts

In [1]:
import requests
import os
import re
from bs4 import BeautifulSoup
import dateutil.parser

In [2]:
rss_feeds = {'Dave Ramsey': 'https://feeds.megaphone.fm/RM4031649020', 
             'Huberman-Lab': 'https://feeds.megaphone.fm/hubermanlab',
             'Peter Attia MD': 'https://peterattiadrive.libsyn.com/rss',
             'Tim Ferriss Show': 'https://rss.art19.com/tim-ferriss-show',}

In [3]:
for value in rss_feeds.values():
    print(value)

https://feeds.megaphone.fm/RM4031649020
https://feeds.megaphone.fm/hubermanlab
https://peterattiadrive.libsyn.com/rss
https://rss.art19.com/tim-ferriss-show


In [4]:
podcast = 'Tim Ferriss Show'
page = requests.get(rss_feeds[podcast])
soup = BeautifulSoup(page.text, 'xml')

In [5]:
def get_all_podcast_items(rss_feeds):
    all_items = []
    for value in rss_feeds.values():
        page = requests.get(value)
        all_items += BeautifulSoup(page.text, 'xml').find_all('item')
        
    return all_items

In [6]:
all_items = get_all_podcast_items(rss_feeds)

In [7]:
podcast_items = soup.find_all('item')
episodes = [podcast.find('enclosure')['url'] for podcast in podcast_items]
titles = [podcast.find('title').text for podcast in podcast_items]
dates = [podcast.find('pubDate').text for podcast in podcast_items]

In [8]:
def parse_date(date):
    return dateutil.parser.parse(date).strftime('%b-%d-%Y') # Add _%H%M for safety


In [9]:
def get_episodes(podcast_items, search=None, limit=5):
    output = []
    for podcast in podcast_items:
        if len(output) == limit:
            break
        elif search is not None:
            if re.search(search, podcast.find('description').text, re.I):
                output.append(podcast)
        else:
            output.append(podcast)
    
    return output

def get_episode_metadata(podcast_items):
    episode_urls = [podcast.find('enclosure')['url'] for podcast in podcast_items]
    episode_titles = [podcast.find('title').text for podcast in podcast_items]
    episode_release_dates = [parse_date(podcast.find('pubDate').text) for podcast in podcast_items]
    return list(zip(episode_urls, episode_titles, episode_release_dates))

In [10]:
#items = get_episodes(all_items, 'ketamine', limit=5)
metadata = get_episode_metadata(podcast_items)
urls, titles, release_dates = zip(*metadata)

In [11]:
# import datetime

# datetime.datetime.strptime(dates[0], '%a, %d %b %Y %X %f').strftime('%y')

In [12]:
EPISODE_COUNT = 10

In [13]:
path = f'./downloads/{podcast}'
if not os.path.exists(path):
    os.mkdir(path)

for i in range(EPISODE_COUNT):
    episode_url = episodes[i]
    redirect_url = requests.get(episode_url).url
    results = requests.get(redirect_url)
    file_name = release_dates[i]

    print(file_name)
    with open(f'{path}/{file_name}.mp3', 'wb') as f:
        f.write(results.content)
        
#     print('---')
#     print(titles[i])
#     #file_name = re.sub(r'[/\.\\]', '', titles[i])[:100]
#     file_name = titles[i].replace('/','-').replace('\\','-').replace('.',' ')[:100]

Oct-31-2022
Oct-28-2022
Oct-25-2022
Oct-19-2022
Oct-11-2022
Oct-08-2022
Oct-05-2022
Sep-29-2022
Sep-27-2022
Sep-27-2022


### Transcribe Podcasts

In [14]:
def read_file(filename, chunk_size=5242880):
    with open(filename, 'rb') as _file:
        while True:
            data = _file.read(chunk_size)
            if not data:
                break
            yield data

In [15]:
headers = {'authorization': os.environ['ASSEMBLY_AI_KEY']}

urls = []
files = f'./downloads/{podcast}'
file_names = os.listdir(files)
for file in file_names:
    filename = f'{files}/{file}'
    response = requests.post('https://api.assemblyai.com/v2/upload',
                            headers=headers,
                            data=read_file(filename))
    
    urls.append(response.json())
    
    

In [16]:
urls

[{'upload_url': 'https://cdn.assemblyai.com/upload/d19dcff0-72b3-44cf-bed6-5ca5d1c756a2'},
 {'upload_url': 'https://cdn.assemblyai.com/upload/84977a93-076d-46a5-abba-55dfa01fd75a'},
 {'upload_url': 'https://cdn.assemblyai.com/upload/f97ba805-b376-43e1-8dc4-71f95d0507cc'},
 {'upload_url': 'https://cdn.assemblyai.com/upload/e8d46b27-250f-4acb-abd9-affa4ee0fe9a'},
 {'upload_url': 'https://cdn.assemblyai.com/upload/40f197f3-4e7a-4cbb-b0e2-b6f864eee6ec'},
 {'upload_url': 'https://cdn.assemblyai.com/upload/07bd0686-606b-4d7c-8f3c-6f572fee53d2'},
 {'upload_url': 'https://cdn.assemblyai.com/upload/271b2fb4-4723-4339-97a4-0a0b2a1b469c'},
 {'upload_url': 'https://cdn.assemblyai.com/upload/e0c440c3-05d2-439c-963c-16447c4f1738'},
 {'upload_url': 'https://cdn.assemblyai.com/upload/235b3e04-0f48-45a7-8f1c-342106111465'}]

In [17]:
count=0
output_ids = []
for url in urls:
    print("Transcription #", count)
    endpoint = "https://api.assemblyai.com/v2/transcript"
    json = {
        "audio_url": url['upload_url'],
        "audio_start_from": 300000,
        "audio_end_at": 600000,
    }
    headers = {
        "authorization": os.environ['ASSEMBLY_AI_KEY'],
        "content-type": "application/json"
    }
    response = requests.post(endpoint, json=json, headers=headers)
    print(response)
    output_ids.append(response.json()['id'])
    count+=1

Transcription # 0
<Response [200]>
Transcription # 1
<Response [200]>
Transcription # 2
<Response [200]>
Transcription # 3
<Response [200]>
Transcription # 4
<Response [200]>
Transcription # 5
<Response [200]>
Transcription # 6
<Response [200]>
Transcription # 7
<Response [200]>
Transcription # 8
<Response [200]>


In [19]:
path = './transcripts'
if not os.path.exists(path):
    os.mkdir(path)

for tid in output_ids:
    transcript_url = f'https://api.assemblyai.com/v2/transcript/{tid}'
    response = requests.get(transcript_url, headers=headers)
    print(response.json()['text'])
    raw_text = response.json()['text']
    with open(f'./transcripts/{tid}.txt', 'w') as f:
        f.write(raw_text)

Cover to your current mattress and start sleeping as cool as 55 degrees Fahrenheit or as hot as 110 degrees Fahrenheit. It also splits your bed in half so your partner can choose a totally different temperature. My girlfriend runs hot all the time. She doesn't need cooling, she loves the heat. And we can have our own bespoke temperatures on either side, which is exactly what we're doing now for me and for many people. The result eight Sleep users fall asleep up to 32% faster, reduce sleep interruptions by up to and get more restful sleep overall. I can personally attest to this because I track it in all sorts of ways. It's the total solution for enhanced recovery so you can take on the next day feeling refreshed. And good news. Eight Sleep has launched the next generation of the pod. The new Pod Three enables more accurate sleep and health tracking with twice the number of sensors. It's just a smoother, better experience that delivers you the best sleep on earth. At least that has been

Voreeclothing.com forward slash Tim. That's vuori clothing.com slash tim not only will you receive 20% off your first purchase, but you'll also enjoy free shipping on any US orders over $75 and free returns. So check it out. Vouri clothing tim tim. That's vuor iclothing.com tim and discover the versatility of viori clothing. At this altitude, I can run flat out for a half mile before my hand starts shaking. Can I ask you a personal question? No. I'm a cybernetic organism living the show over metal mental Cellar Show hello, boys and girls, ladies and germs. This is Tim Ferriss. Welcome to another episode of The Tim Ferriss Show, where it is usually my job to sit down with world class performers of all different types, to tease out the habits, routines, favorite books and so on that you can apply and test in your own life. This time we have a slightly different format, which is backed by popular request, and I am the guest. If you would like an interview, you can choose any other episode

At this altitude, I can run flat out for a half mile before my hands start shaking. Can I ask you a personal question? I'm a cybernetic organism living to show our metal and hello boys and girls, ladies and germs, this is Tim Ferriss. Welcome to another episode of The Tim Ferriss Show, where it is usually my job to deconstruct world class performers, to tease out their routines, habits, etc. That you can apply to your own life and lives. This episode is something different. It's an experimental format that I am super excited about, and apparently so are many of you. It's backed by popular demand. This is an episode that scratches an itch I've had for many years now. I'm not always able to listen to every great podcast episode out there, even when they are by some of my closest friends. To answer that predicament, I decided to ask them, many of my friends, to send a top segment from their own podcast. Podcasts that I could listen to and more importantly, also share with you my dear list

Hello, boys and girls. This is Tim Ferrison. Welcome to another episode of The Tim Ferriss Show, where it is my job each episode to deconstruct, tease out the habits and routines, best practices, life lessons and so on from worldclass performers from many, many different disciplines. My guest today is Jim Collins, and this is a rare treat because very, very seldom does any media or interviews whatsoever. I have wanted to speak with him for more than a decade, in fact, and it was worth the wait. This conversation over, delivered on every level I can imagine, and I really hope you enjoy it as much as I did. So who is Jim Collins, this mysterious reclusive mastermind polymath? Jim Collins is a student and teacher of what makes great companies tick, and a Socratic advisor will get a better idea of what that means, particularly in the beginning of the interview, where he wants to ask me questions. So we do get to Jim's story, but he wants to, and wanted to, even before we started recording,

None


TypeError: write() argument must be str, not None