In [7]:
# import a csv
import pandas as pd
import json

# read csv
df = pd.read_csv('slag.csv')


In [8]:
import re

def extract_standard_youtube_link(text):
    # Regular expression pattern to match YouTube links
    youtube_pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?([a-zA-Z0-9_-]{11})"
    
    # Find the first match of a YouTube link in the text
    match = re.search(youtube_pattern, text)
    
    if match:
        video_id = match.group(1)
        youtube_link = f"https://www.youtube.com/watch?v={video_id}"
        return youtube_link
    
    return ""

In [9]:
extract_standard_youtube_link("https://m.youtube.com/watch?v=aOCOke2qrkc")

'https://www.youtube.com/watch?v=aOCOke2qrkc'

In [10]:

# json format
# [ {
    # "title" : first_row,
    # "decription" : second_row,
    # "videos" : remaining_rows
# } ....
# ]
# do this for every column

gen_data=[]

# iterate columns
for col in df.columns:
    col_data ={
        "title" : col,
        "description" : "",
        "videos" : []
    }
    # iterate rows
    for index, row in df.iterrows():
        if index == 0:
            col_data['description'] = row[col]
        else:
            col_data['videos'].append(extract_standard_youtube_link(str(row[col])))
    gen_data.append(col_data)

print(gen_data)

[{'title': 'Cinematic Western Wedding Films', 'description': 'Cinematic western wedding Films. With Story build up and Full color correction, Edited in Premier Pro', 'videos': ['https://www.youtube.com/watch?v=tAJnu6SJ9mo', 'https://www.youtube.com/watch?v=u4nLLun2ImE', 'https://www.youtube.com/watch?v=jJLX40HjPFs', 'https://www.youtube.com/watch?v=ZMvXnhZgshs', 'https://www.youtube.com/watch?v=nCH9-sTsf9M', 'https://www.youtube.com/watch?v=1orgr6cz7xc', 'https://www.youtube.com/watch?v=iVSrc1m3liw', 'https://www.youtube.com/watch?v=DoiwWAsmcHI', 'https://www.youtube.com/watch?v=dT2ObqSJWTc', 'https://www.youtube.com/watch?v=YPmEgbLvWAs', 'https://www.youtube.com/watch?v=9AcSjQkVJZY', 'https://www.youtube.com/watch?v=6P1cSSjau1E', 'https://www.youtube.com/watch?v=-3iVSqn575U', 'https://www.youtube.com/watch?v=LPCyKnQHQRk', 'https://www.youtube.com/watch?v=vCIzoa7j9wo', 'https://www.youtube.com/watch?v=rsHcHBINwM8', 'https://www.youtube.com/watch?v=cUQxDLHIVBw', 'https://www.youtube.com

In [11]:
# remove empty videos
for col in gen_data:
    col['videos'] = [x for x in col['videos'] if x != '']

print(gen_data)

[{'title': 'Cinematic Western Wedding Films', 'description': 'Cinematic western wedding Films. With Story build up and Full color correction, Edited in Premier Pro', 'videos': ['https://www.youtube.com/watch?v=tAJnu6SJ9mo', 'https://www.youtube.com/watch?v=u4nLLun2ImE', 'https://www.youtube.com/watch?v=jJLX40HjPFs', 'https://www.youtube.com/watch?v=ZMvXnhZgshs', 'https://www.youtube.com/watch?v=nCH9-sTsf9M', 'https://www.youtube.com/watch?v=1orgr6cz7xc', 'https://www.youtube.com/watch?v=iVSrc1m3liw', 'https://www.youtube.com/watch?v=DoiwWAsmcHI', 'https://www.youtube.com/watch?v=dT2ObqSJWTc', 'https://www.youtube.com/watch?v=YPmEgbLvWAs', 'https://www.youtube.com/watch?v=9AcSjQkVJZY', 'https://www.youtube.com/watch?v=6P1cSSjau1E', 'https://www.youtube.com/watch?v=-3iVSqn575U', 'https://www.youtube.com/watch?v=LPCyKnQHQRk', 'https://www.youtube.com/watch?v=vCIzoa7j9wo', 'https://www.youtube.com/watch?v=rsHcHBINwM8', 'https://www.youtube.com/watch?v=cUQxDLHIVBw', 'https://www.youtube.com

In [12]:
# save to json
with open('generated.json', 'w') as outfile:
    json.dump(gen_data, outfile)