# Off menu fuzzy matching

This notebook serves as a development environment for the logic to identify mentions of the restaurants in the transcript, using fuzzywuzzy library. The final production code is located in off_menu/data_processing.py.

In [None]:
# Ensure imports can find my utils:

import sys
import os

notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..', '..'))

if project_root not in sys.path:
    sys.path.insert(0, project_root)

print(f"Project root added to sys.path: {project_root}")
print(f"Current sys.path: {sys.path}")

# Import libraries
import pandas as pd
from bs4 import BeautifulSoup
import requests 
import re 
from typing import List, Tuple, Dict # For type hinting the new return type#
import numpy as np
from fuzzywuzzy import process
from fuzzywuzzy import fuzz

# Project specific imports
from off_menu.utils import clean_text
from off_menu.utils import num_check
from off_menu.utils import find_num_end
from off_menu.utils import name_num_split
from off_menu.utils import clean_res
from off_menu.utils import get_episode_sentences
from off_menu.utils import create_sentence_list
from off_menu.utils import try_read_parquet

Project root added to sys.path: c:\Users\jbara\Data science projects (store here not desktop on onedrive)\Off Menu project
Current sys.path: ['c:\\Users\\jbara\\Data science projects (store here not desktop on onedrive)\\Off Menu project', 'C:\\Users\\jbara\\miniconda3\\python312.zip', 'C:\\Users\\jbara\\miniconda3\\DLLs', 'C:\\Users\\jbara\\miniconda3\\Lib', 'C:\\Users\\jbara\\miniconda3', 'c:\\Users\\jbara\\OneDrive\\Desktop\\Data_science\\Python projects\\Off Menu project\\.venv', '', 'c:\\Users\\jbara\\OneDrive\\Desktop\\Data_science\\Python projects\\Off Menu project\\.venv\\Lib\\site-packages', 'c:\\Users\\jbara\\OneDrive\\Desktop\\Data_science\\Python projects\\Off Menu project\\.venv\\Lib\\site-packages\\win32', 'c:\\Users\\jbara\\OneDrive\\Desktop\\Data_science\\Python projects\\Off Menu project\\.venv\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\jbara\\OneDrive\\Desktop\\Data_science\\Python projects\\Off Menu project\\.venv\\Lib\\site-packages\\Pythonwin']


## Configuration and filepaths

In [13]:
test_temp_dir = os.path.join(project_root, 'data', 'test_temp')
first_five_path = os.path.join(test_temp_dir, 'test_clean_text_and__timestamps_df.parquet') # First five transcripts and timestamps
ep_metadata_head_filepath = os.path.join(test_temp_dir, 'ep_meta_and_mentions_head.parquet')

## Read test data (transcripts/timestamps, epsiode metadat), confirm structure

In [14]:
# Reading first five transcripts and timestamps

first_five_clean_transcript_timestamps_df = try_read_parquet(first_five_path)

# Reading full episode metadata
ep_meta_and_mentions_head = try_read_parquet(ep_metadata_head_filepath)

# Confirm structure of test data
print("------First five df:------")
print(first_five_clean_transcript_timestamps_df.head())
print("\n------ep and mentions df head------")
print(ep_meta_and_mentions_head)

------First five df:------
   episode_number     guest_name  \
0               1  Scroobius Pip   
1               2     Grace Dent   
2               3  Richard Osman   
3               4     Nish Kumar   
4               5    Aisling Bea   

                               clean_transcript_text  \
0  starting point is 00:00:00 hello, listeners of...   
1  starting point is 00:00:00 hello, listeners of...   
2  starting point is 00:00:00 hello, listeners of...   
3                                                      
4  starting point is 00:00:00 hello, listeners of...   

                                 periodic_timestamps  
0  [{'episode_number': 1, 'start_index': 0, 'time...  
1  [{'episode_number': 2, 'start_index': 0, 'time...  
2  [{'episode_number': 3, 'start_index': 0, 'time...  
3                                                 []  
4  [{'episode_number': 5, 'start_index': 0, 'time...  

------ep and mentions df head------
   episode_number     guest_name  \
0               

## Refactoring fuzzymatching and timestamp algorithms

### Merge dataframes, develop function to merge

In [None]:
# Note selecting only episode number and res mentions from ep_and_mentions to be merged
combined_test_df = first_five_clean_transcript_timestamps_df.merge(
        ep_meta_and_mentions_head[['episode_number', 'restaurants_mentioned']],
        on='episode_number',
        how='left'
    )

def combine_metadata_timestamps(transcripts_timestamps_filepath, metadata_filepath):
    metadata_df = try_read_parquet(metadata_filepath)
    transcripts_timestamps_df = try_read_parquet(transcripts_timestamps_filepath)
    print(f"metadata_df: {metadata_df}")
    print(f"transcripts timestamps df: {transcripts_timestamps_df}")
    combined_df = transcripts_timestamps_df.merge(
            metadata_df[['episode_number', 'restaurants_mentioned']],
            on='episode_number',
            how='left'
        )
    return combined_df

combined_test_df = combine_metadata_timestamps(first_five_path, ep_metadata_head_filepath)
print(combined_test_df) 

metadata_df:    episode_number     guest_name  \
0               1  Scroobius Pip   
1               2     Grace Dent   
2               3  Richard Osman   
3               4     Nish Kumar   
4               5    Aisling Bea   

                                                 url  \
0  https://podscripts.co/podcasts/off-menu-with-e...   
1  https://podscripts.co/podcasts/off-menu-with-e...   
2  https://podscripts.co/podcasts/off-menu-with-e...   
3  https://podscripts.co/podcasts/off-menu-with-e...   
4  https://podscripts.co/podcasts/off-menu-with-e...   

                     restaurants_mentioned  
0                  [Oli Babas Kerb Camden]  
1                     [Little Owl, Trullo]  
2  [Five Guys, Cora Pearl, Berners Tavern]  
3     [Bademiya, The Owl and The Pussycat]  
4     [Cafe Gratitude, Burger and Lobster]  
transcripts timestamps df:    episode_number     guest_name  \
0               1  Scroobius Pip   
1               2     Grace Dent   
2               3  Richard O

## Function to turn transcript string into list of clean sentences for search using RegEx

In [None]:
# We must clean the sentence list using regex to improve matching, so we have to also store the 'true' (original) start index of each sentence.
# So we can use it to locate nearest prior timestamp later.

def _create_list_tuple_clean_sen_og_sen_og_index(text: str) -> List[Tuple[str, str, int]]:
    """
    Takes in a string (designed for clean transcript). Outputs a list containing a tuple, with cleaned sentence, original 
    stripped sentence, and true start index (the start index of the original sentence, in the original text). 

    If a fw match is found within the sentence, we can then use this "true" index to find nearest prior timestamp from the
    cleaned transcript text.
    
    Splits text using delimiter ". ". 

    Assumes no sentences start with puntuation (leading spaces are the only shift from the start of the original to the start
    of the cleaned sentence). 
    """
    results = []
    current_idx_in_original = 0 # This tracks our position in the original 'text'

    # Split into 'segments' (what will become sentences) by full stop/space. 
    segments = text.split(". ")

    for i, segment in enumerate(segments): # Note enumerate is a way to loop and get index (rather than a manual counter)
        original_full_sentence_segment = segment 
        # Calculate the actual start index of the content within the segment itself (after stripping leading/trailing spaces)
        # It asssumes the start index (in processes sentence) will only move due to leading spaces
        # So, it calculates the original (assuming none start with punctuation), and retains it
        # Later, we will use this original index to compare against timestamps
        leading_spaces_count = len(original_full_sentence_segment) - len(original_full_sentence_segment.lstrip())
        true_start_index = current_idx_in_original + leading_spaces_count

        original_sentence_stripped = original_full_sentence_segment.strip() 

        # Only process if the sentence is not empty after stripping
        if original_sentence_stripped: 
            # Apply original cleaning, explicitly converting to lowercase for fuzzy matching
            cleaned_sentence = re.sub(r"[^\w\s]", "", original_sentence_stripped).lower()
            
            # Store cleaned, original, and start index
            results.append((cleaned_sentence, original_sentence_stripped, true_start_index))

        # Update current_idx_in_original for the next segment.
        # Add the length of the current segment and the delimiter length (2 for ". ").
        # This assumes all segments (except possibly the last) were followed by ". ".
        current_idx_in_original += len(original_full_sentence_segment)
        if i < len(segments) - 1: # Only add delimiter length if it's not the last segment
            current_idx_in_original += len(". ") 
            
    return results


--- Processing Episode 1 (Scroobius Pip) ---
  Transcript length: 73155 characters
  Number of periodic timestamps: 170
  Found 1 raw mentions for this episode:
Here they are: ['oli babas kerb camden']
DEBUG: Query 'oli babas kerb camden'
No specific mentions processed for Episode 1.

--- Processing Episode 2 (Grace Dent) ---
  Transcript length: 57238 characters
  Number of periodic timestamps: 156
  Found 2 raw mentions for this episode:
Here they are: ['little owl', 'trullo']
DEBUG: Query 'little owl'
DEBUG: Query 'trullo'
Mentions collected for Episode 2: 2 rows.

--- Processing Episode 3 (Richard Osman) ---
  Transcript length: 87390 characters
  Number of periodic timestamps: 280
  Found 3 raw mentions for this episode:
Here they are: ['five guys', 'cora pearl', 'berners tavern']
DEBUG: Query 'five guys'
DEBUG: Query 'cora pearl'
DEBUG: Query 'berners tavern'
Mentions collected for Episode 3: 9 rows.

--- Processing Episode 4 (Nish Kumar) ---
  Transcript length: 0 characters
  

### Fuzzy matching function

In [None]:
def _matches_by_res_name_from_list_of_res_names(restaurant_names: List[str], searchable_sentences: List[str], min_score: int) -> Dict[str, List[Tuple[str, int, int]]]:
    """
    Finds fuzzy matches for a list of restaurant names, in a list of cleaned sentences.
    Returns a dict, where each key is a res name, and values are a list of filtered matches.
    Each match is a tuple containing match text, score, and the original index from the searchable_sentences list.
    """
    filtered_matches_by_string = {}
    for res_name in restaurant_names:
        matches = process.extract(
            res_name,
            searchable_sentences, 
            scorer=fuzz.partial_ratio,
            limit=20
        )
        
        print(f"DEBUG: Query '{res_name}'")
        
        filtered_matches = []
        # --- FIX: Unpack the tuple of 2 items correctly ---
        for match_text, score in matches:
            if score >= min_score:
                # Find the index of the matched sentence in the original list
                # We use a try-except block for robustness in case of unexpected data.
                try:
                    original_sentence_index = searchable_sentences.index(match_text)
                    # Append all three pieces of information
                    filtered_matches.append((match_text, score, original_sentence_index)) 
                except ValueError:
                    # This will happen if the match text isn't found in the list,
                    # e.g., due to slight string differences not captured by .index()
                    continue
        
        filtered_matches_by_string[res_name] = filtered_matches

    return filtered_matches_by_string

### Find timestamp function

In [None]:
# transcript_timestamps in the first_five_clean_transcript_timestamps_df are a list of dicts
# Each dict (stamp) looks like this: 'episode_number': ep_num, 'timestamp': actual_time_string, 'start_index': start_position_in_text

def _find_timestamp(original_sentence_start_index: int, transcript_timestamps: List[dict]):
    """
    Takes an original start index for a sentence where a match was found, and a list of timestamp dictionaries,
    and returns the nearest timestamp occurring before or at that index.
    """
    if original_sentence_start_index is None:
        return None 
    # Could sort timestamps here for good practice, but should be sorted already
    # Reverse-iterate over timestamps to find the "nearest before or at"
    for timestamp_dict in reversed(transcript_timestamps):
        if timestamp_dict["start_index"] <= original_sentence_start_index:
            return timestamp_dict["timestamp"]
            
    return None # If no timestamp found before the quote's starting position (all eps start "Starting point is 00:00:00")

### Find matches and timestamps function

Iterates through episodes, where there are mentions, creates a dict for each mention, and returns a list of dicts (all mentions)

In [None]:
# Can store in a seperate df for now, and merge if we want to (good for MVP testing, good for flexibility)

def find_matches_and_timestamps(combined_df: pd.DataFrame, min_match_score: int = 90):
    """
    Takes in a combined_df (combines ep_meta_and_mentions with clean_transcripts_timestamps), iterates through episodes
    and applies matches_by_res_name_from_list_of_res_names (which iterates through each restaurant, and searches for matches using the
    full mention as a query). Returns a list of all mentions.

    Mentions are structured:
    mention = {
                        "Episode ID": episode_number, 
                        "Restaurant": restaurant_name_query,
                        "Mention text": original_sentence_text, 
                        "Match Score": score,
                        "Match Type": "full, over 90",
                        "Timestamp": timestamp, 
                    }

    Note mentions have been cleaned using same logic applied to sentences (free of puntuation), and replaces e with an accent.
    """
    all_mentions_collected = []

    for index, combined_row in combined_test_df.iterrows():
        mentions_for_current_episode = [] # This list now holds mentions for the CURRENT episode only
        episode_number = combined_row.get('episode_number')
        guest_name = combined_row.get('guest_name')
        clean_transcript_text = combined_row.get('clean_transcript_text')
        periodic_timestamps = combined_row.get('periodic_timestamps')
        
        restaurants_data = combined_row.get('restaurants_mentioned', [])

        # Unsure what data type the res mentions are, hence need for this
        restaurants_list = []
        if isinstance(restaurants_data, list):
            restaurants_list = restaurants_data
            print(f"Res mentions are in list form")
        elif isinstance(restaurants_data, np.ndarray) and restaurants_data.size > 0:
            # Flatten the array and convert it to a standard Python list of strings
            restaurants_raw_list = restaurants_data.flatten().tolist()
            restaurants_list = [name.strip().lower() for name in restaurants_raw_list if name.strip()]
        elif isinstance(restaurants_data, str):
            restaurants_list = [name.strip() for name in restaurants_data.split(',') if name.strip()]
            print(f"Res mentions are in string form")

        print(f"\n--- Processing Episode {episode_number} ({guest_name}) ---")
        print(f"  Transcript length: {len(clean_transcript_text)} characters")
        print(f"  Number of periodic timestamps: {len(periodic_timestamps)}")

        if restaurants_list:
            print(f"  Found {len(restaurants_list)} raw mentions for this episode:")
            print(f"Here they are: {restaurants_list}")
            
            episode_sentences_data = _create_list_tuple_clean_sen_og_sen_og_index(clean_transcript_text)
            searchable_sentences = [item[0] for item in episode_sentences_data] # This is to select the cleaned sentence from the list of tuple
            # of cleaned sentence, original, and true start index that create_sentence_list creates

            all_matches_for_episode = _matches_by_res_name_from_list_of_res_names(restaurants_list, searchable_sentences, 90)
            # === all_matches_for_episode is a dict with key res_name and value lists of matches (matches r tuples of quote, score)
            for restaurant_name_query, match_list_for_query in all_matches_for_episode.items():
                if match_list_for_query: 
                    for matched_cleaned_text, score, matched_sentence_index in match_list_for_query:
                        original_sentence_data = episode_sentences_data[matched_sentence_index] # This takes you back to episode sentences data for the sentence index 
                        # Which is a tuple of clean sentence, original, and index of sentence within sen list
                        original_sentence_text = original_sentence_data[1] # The og sentence is at index 1 in this tuple
                        original_start_index = original_sentence_data[2]   # The og start index is at index 2 in this tuple
                        
                        timestamp = _find_timestamp(original_start_index, periodic_timestamps)
                        
                        mention = {
                            "Episode ID": episode_number, 
                            "Restaurant": restaurant_name_query,
                            "Mention text": original_sentence_text, 
                            "Match Score": score,
                            "Match Type": "full, over 90",
                            "Timestamp": timestamp, 
                        }
                        # 2. Append each individual mention to the GLOBAL list
                        all_mentions_collected.append(mention) 
                        mentions_for_current_episode.append(mention) # Keep this to count for the print statement below

            if mentions_for_current_episode: # Check the current episode's list for printing
                print(f"Mentions collected for Episode {episode_number}: {len(mentions_for_current_episode)} rows.")
            else:
                print(f"No specific mentions processed for Episode {episode_number}.")
                
        else:
            print(f"  No raw mentions found in 'restaurants_mentioned' list for Episode {episode_number}.")
    return all_mentions_collected


all_mentions_collected = find_matches_and_timestamps(combined_test_df, 90)

### Convert list of dicts into a dataframe

In [None]:
if all_mentions_collected:
    final_mentions_df = pd.DataFrame(all_mentions_collected)
    print(f"\n--- ALL MENTIONS COLLECTED ---")
    print(f"Total Mentions DataFrame created with {len(final_mentions_df)} rows.")
    print(f"\n --- Here is the full dataframe ---")
    print(final_mentions_df) 
else:
    print("\nNo mentions were found across all episodes.")

## Function to list top matche for each query only

The MVP will first identify easy wins (top match from using the full restaurant name as a query)

In [None]:
def find_top_match_and_timestamps(combined_df: pd.DataFrame, min_match_score: int = 90):
    """
    Takes in a combined_df (combines ep_meta_and_mentions with clean_transcripts_timestamps), iterates through episodes
    and applies matches_by_res_name_from_list_of_res_names (which iterates through each restaurant, and searches for matches using the
    full mention as a query). Returns a list of all mentions.

    Mentions are structured:
    mention = {
                        "Episode ID": episode_number, 
                        "Restaurant": restaurant_name_query,
                        "Mention text": original_sentence_text, 
                        "Match Score": score,
                        "Match Type": "full, over 90",
                        "Timestamp": timestamp, 
                    }

    Note mentions have been cleaned using same logic applied to sentences (free of puntuation), and replaces e with an accent.
    """
    all_mentions_collected = []

    for index, combined_row in combined_df.iterrows():
        mentions_for_current_episode = [] # This list now holds mentions for the CURRENT episode only
        episode_number = combined_row.get('episode_number')
        guest_name = combined_row.get('guest_name')
        clean_transcript_text = combined_row.get('clean_transcript_text')
        periodic_timestamps = combined_row.get('periodic_timestamps')
        
        restaurants_data = combined_row.get('restaurants_mentioned', [])

        # Unsure what data type the res mentions are, hence need for this
        restaurants_list = []
        if isinstance(restaurants_data, list):
            restaurants_list = restaurants_data
            print(f"Res mentions are in list form")
        elif isinstance(restaurants_data, np.ndarray) and restaurants_data.size > 0:
            # Flatten the array and convert it to a standard Python list of strings
            restaurants_raw_list = restaurants_data.flatten().tolist()
            restaurants_list = [name.strip().lower() for name in restaurants_raw_list if name.strip()]
        elif isinstance(restaurants_data, str):
            restaurants_list = [name.strip() for name in restaurants_data.split(',') if name.strip()]
            print(f"Res mentions are in string form")

        print(f"\n--- Processing Episode {episode_number} ({guest_name}) ---")
        print(f"  Transcript length: {len(clean_transcript_text)} characters")
        print(f"  Number of periodic timestamps: {len(periodic_timestamps)}")

        if restaurants_list:
            print(f"  Found {len(restaurants_list)} raw mentions for this episode:")
            print(f"Here they are: {restaurants_list}")
            
            episode_sentences_data = _create_list_tuple_clean_sen_og_sen_og_index(clean_transcript_text)
            searchable_sentences = [item[0] for item in episode_sentences_data] # This is to select the cleaned sentence from the list of tuple
            # of cleaned sentence, original, and true start index that create_sentence_list creates

            all_matches_for_episode = _matches_by_res_name_from_list_of_res_names(restaurants_list, searchable_sentences, 90)
            # === all_matches_for_episode is a dict with key res_name and value lists of matches (matches r tuples of quote, score)
            for restaurant_name_query, match_list_for_query in all_matches_for_episode.items():
                if match_list_for_query: 
                    top_match = match_list_for_query[0]
                    # Unpack the top match's data
                    matched_cleaned_text, score, matched_sentence_index = top_match
                    original_sentence_data = episode_sentences_data[matched_sentence_index] # This takes you back to episode sentences data for the sentence index 
                    # Which is a tuple of clean sentence, original, and index of sentence within sen list
                    original_sentence_text = original_sentence_data[1] # The og sentence is at index 1 in this tuple
                    original_start_index = original_sentence_data[2]   # The og start index is at index 2 in this tuple
                    
                    timestamp = _find_timestamp(original_start_index, periodic_timestamps)
                    
                    mention = {
                        "Episode ID": episode_number, 
                        "Restaurant": restaurant_name_query,
                        "Mention text": original_sentence_text, 
                        "Match Score": score,
                        "Match Type": "full, over 90",
                        "Timestamp": timestamp, 
                    }
                    # 2. Append each individual mention to the GLOBAL list
                    all_mentions_collected.append(mention) 
                    mentions_for_current_episode.append(mention) # Keep this to count for the print statement below

            if mentions_for_current_episode: # Check the current episode's list for printing
                print(f"Mentions collected for Episode {episode_number}: {len(mentions_for_current_episode)} rows.")
            else:
                print(f"No specific mentions processed for Episode {episode_number}.")
                
        else:
            print(f"  No raw mentions found in 'restaurants_mentioned' list for Episode {episode_number}.")
    return all_mentions_collected
    

top_matches = find_top_match_and_timestamps(combined_test_df, 90)


--- Processing Episode 1 (Scroobius Pip) ---
  Transcript length: 73155 characters
  Number of periodic timestamps: 170
  Found 1 raw mentions for this episode:
Here they are: ['oli babas kerb camden']
DEBUG: Query 'oli babas kerb camden'
No specific mentions processed for Episode 1.

--- Processing Episode 2 (Grace Dent) ---
  Transcript length: 57238 characters
  Number of periodic timestamps: 156
  Found 2 raw mentions for this episode:
Here they are: ['little owl', 'trullo']
DEBUG: Query 'little owl'
DEBUG: Query 'trullo'
Mentions collected for Episode 2: 2 rows.

--- Processing Episode 3 (Richard Osman) ---
  Transcript length: 87390 characters
  Number of periodic timestamps: 280
  Found 3 raw mentions for this episode:
Here they are: ['five guys', 'cora pearl', 'berners tavern']
DEBUG: Query 'five guys'
DEBUG: Query 'cora pearl'
DEBUG: Query 'berners tavern'
Mentions collected for Episode 3: 3 rows.

--- Processing Episode 4 (Nish Kumar) ---
  Transcript length: 0 characters
  

### Convert list of top mentions into dataframe

In [None]:
if top_matches:
    top_mentions_df = pd.DataFrame(top_matches)
    print(f"\n--- TOP COLLECTED ---")
    print(f"Top Mentions DataFrame created with {len(top_mentions_df)} rows.")
    print(top_mentions_df) 
    print(f"\n --- Here is the full top mentions dataframe ---")
else:
    print("\nNo mentions were found across all episodes.")

## Debugging matches functions

In [None]:
# Gather restaurant mentions from df

def res_mens(ep_num, dataframe=combined_test_df):
    combined_df = dataframe
    ep_row = combined_df[combined_df['episode_number'] == ep_num]
    res_data = ep_row.get('restaurants_mentioned').item()
    restaurants_raw_list = res_data.flatten().tolist()
    res_mentions = [name.strip().lower() for name in restaurants_raw_list if name.strip()]
    return res_mentions

print(f"res mens: {res_mens(2)}")
ep2mentions = res_mens(2)

# Examine transcript

def transcript(ep_num, dataframe=combined_test_df):
    combined_df = dataframe
    ep_row = combined_df[combined_df['episode_number'] == ep_num]
    transcript_series = ep_row['clean_transcript_text']
    transcript_string = transcript_series.iloc[0]
    return transcript_string
print(f"\n transcript sample: {transcript(2)[:1000]}")

ep2transcript = transcript(2)

# Create sentences list(s)

episode_sentences_data = _create_list_tuple_clean_sen_og_sen_og_index(ep2transcript)
ep2_searchable_sentences = [item[0] for item in episode_sentences_data]

# Collate top 20 matches for restautant mention(s)

def check_top_twenty(matches_list, searchable_sentences, score=0):
    matches = _matches_by_res_name_from_list_of_res_names(matches_list, searchable_sentences, score)
    for res_name in matches.keys():
        print(f"\n---Results for query: {res_name}")
        for match in matches[res_name]:
            print(f"\nMatch: {match}")

check_top_twenty(ep2mentions, ep2_searchable_sentences )


res mens: ['little owl', 'trullo']

 transcript sample: starting point is 00:00:00 hello, listeners of the off menu podcast. it is ed gamble here from the off menu podcast. i have a very exciting announcement. i have written my first ever book. i am absolutely over the moon to announce this. i'm very, very proud of it. of course, what else could i write a book about? but food. my book is all about food. my life in food. how greedy i am. what a greedy little boy i was. what a greedy adult i am. i think it's very funny. i'm very proud of it. the book is called glutton, the multi-course life of a very greedy boy. and it's coming out this october, but it is available to pre-order now, wherever you pre-order books from. and if you like my signature, i've done some signed copies, starting point is 00:00:43 which are exclusively available from waterstones. but go and pre-order your copy of glutton, the multi-course life of a very greedy boy now. please? bon appetito and welcome to the off men