In [None]:
import subprocess

def find_zombie_processes():
    """Find all zombie processes."""
    try:
        result = subprocess.run(["ps", "aux"], capture_output=True, text=True)
        lines = result.stdout.split("\n")

        zombie_pids = []
        for line in lines:
            parts = line.split()
            if len(parts) > 7 and parts[7] == "Z":
                pid = parts[1]  # PID is the second column
                zombie_pids.append(pid)

        return zombie_pids
    except Exception as e:
        print(f"Error finding zombie processes: {e}")
        return []

def kill_processes(pids):
    """Kill processes by PID."""
    for pid in pids:
        try:
            subprocess.run(["sudo", "kill", "-9", pid], check=True)
            print(f"✅ Killed zombie process: {pid}")
        except Exception as e:
            print(f"❌ Failed to kill process {pid}: {e}")

if __name__ == "__main__":
    zombie_pids = find_zombie_processes()
    if zombie_pids:
        print(f"🧟 Found zombie processes: {zombie_pids}")
        kill_processes(zombie_pids)
    else:
        print("🎉 No zombie processes found!")


In [None]:
!pip install -r requirements.txt 

In [None]:
!nvidia-smi
!df -h

In [None]:
import papermill as pm
import os
import shutil
import subprocess
import openai
import json
import csv
from typing import List
from pathlib import Path


# method = "songs"
method = "artists"

if method == "songs":
    all_songs = {
            "Alan Walker": ["Faded"],
        }
    artists= list(all_songs.keys())
    def get_top_songs(artist):
        return all_songs.get(artist, [])[:1]

elif method == "artists":
    # === Step 1: Define artist list ===
    artists = [ "Queen","Bruno Mars", "Lady Gaga","EMINEM","Green day", "Yes",
           	"The rolling stones","Red zeppelin","The stone roses", "The Beatles","Oasis",
           	"Backstreet boys", "Bon Jovi","David Bowie", "Alan Walker", "Elton Jon",
           	"Sting","Billy Joel","Ed Sheeran", "Twenty One Pilots"]
    # artists = ["Red zeppelin"]
    
    # === Step 2: Function to get top 5 songs using ChatGPT API ===
    def get_top_songs(artist: str) -> List[str]:
    	client = openai.OpenAI(api_key="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")  # APIキーをセット
    	response = client.chat.completions.create(
    	model="gpt-4",
    	messages=[
        	{"role": "system", "content": "You are a my assistant."},
        	{"role": "user", "content": f"List 3 of the most famous songs by {artist}. Only return a plain numbered list of song titles, no extra explanation."}
    	]
    	)
    	text = response.choices[0].message.content
    	print(text)
    	songs = [line.split(". ", 1)[-1].strip() for line in text.split("\n") if line.strip()]
    	return songs[:3]



# === Step 3: Utility to check audio file ===
def file_found(path: Path, exts=[".mp3", ".m4a"]) -> bool:
    return any((path / f"music{ext}").exists() for ext in exts)

# === Step 4: Extract reference URL ===
def extract_reference_url(path: Path) -> str:
    # ディレクトリ内の info.json ファイルを全部見る
    for file in path.glob("music.*.info.json"):
        try:
            with open(file, "r", encoding="utf-8") as f:
                data = json.load(f)
                url = data.get("webpage_url", "")
                if url.startswith("http"):
                    return url  # 最初に見つけた本物のURLを返す
        except Exception as e:
            print(f"❌ Failed to read {file.name}: {e}")
    return ""


# === Step 5: Download audio ===
def download_mp3(search_query: str, target_path: str):
    os.makedirs(target_path, exist_ok=True)
    path = Path(target_path)

    for n in [1, 3, 5, 7]:
        print(f"🔍 Trying: scsearch{n}:{search_query}")
        command = [
            "yt-dlp",
            f"scsearch{n}:{search_query} full",
            "-x", "--audio-format", "mp3",
            "--write-info-json",
            "-o", os.path.join(target_path, "music.%(id)s.%(ext)s")
        ]
        subprocess.run(command, check=False)

        # 🔽 ダウンロードされた mp3 ファイルを探す
        mp3_files = list(path.glob("music.*.mp3"))
        if mp3_files:
            downloaded_file = mp3_files[0]
            final_path = path / "music.mp3"
            downloaded_file.rename(final_path)  # 🔁 music.mp3 にリネーム
            print(f"✅ Success with scsearch{n} - Saved as {final_path}")
            return

        print(f"⚠️ No valid audio file found with scsearch{n}, retrying...")

    print(f"❌ Failed to get good audio for: {search_query}")

# === Step 6: Copy notebook template ===
def copy_notebook_template(target_path: str, notebook_template_path: str):
    shutil.copy(os.path.join(notebook_template_path, "../notebooks/Video_generate.ipynb"), os.path.join(target_path, "Video_generate.ipynb"))

# === Step 7: Create info.csv and templates ===
def generate_info_csv(dataset_dir: str, artist: str, song: str, output_csv: str):
    reference = extract_reference_url(Path(dataset_dir))
    print("reference", reference)
    entry = {"artist": artist, "song": song, "reference": reference}
    with open(output_csv, "w", newline='', encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["artist", "song", "reference"])
        writer.writeheader()
        writer.writerow(entry)

def fill_templates(info_csv: str, base_dir: str,song_dir: str):
    title_template_path=os.path.join(base_dir, "../src/templates/title.txt")
    desc_template_path=os.path.join(base_dir, "../src/templates/description.txt")
    title_tpl = Path(title_template_path).read_text(encoding="utf-8")
    desc_tpl = Path(desc_template_path).read_text(encoding="utf-8")
    with open(info_csv, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            title = title_tpl.format(artist=row["artist"], music=row["song"], reference=row["reference"])
            desc = desc_tpl.format(artist=row["artist"], music=row["song"], reference=row["reference"])

            Path(song_dir, "title.txt").write_text(title, encoding="utf-8")
            Path(song_dir, "description.txt").write_text(desc, encoding="utf-8")

# === Step 8: 全体の統合処理 ===
def generate_dataset_structure(base_dir: str, artists: List[str]):
    for x, artist in enumerate(artists):
        try:
            top_songs = get_top_songs(artist)
            for song in top_songs:
                song_dir = os.path.join(base_dir, f"{artist.replace(' ', '_')}_{song.replace(' ', '_')}")
                os.makedirs(song_dir, exist_ok=True)
                download_mp3(f"{artist} {song}", song_dir)
                copy_notebook_template(song_dir, base_dir)
                info_csv = os.path.join(song_dir, "info.csv")
                generate_info_csv(song_dir, artist, song, info_csv)
                fill_templates(info_csv, base_dir,song_dir)
        except Exception as e:
            print(f"Error processing {artist}: {e}")
            

# Output path
# Get current directory
current_dir = os.getcwd()
# Set output path relative to current directory
output_base_dir = os.path.join(current_dir, "songs_dataset")
# Now use output_base_dir as your base folder
os.makedirs(output_base_dir, exist_ok=True)  # Create if doesn't exist


# Uncomment to run the scaffolding
generate_dataset_structure(output_base_dir, artists)


# Notes:
# - You should have `yt-dlp` installed and in PATH
# - Replace "YOUR_API_KEY" with your actual OpenAI API key
# - Place "Music.ipynb" in the same directory as this script




In [None]:
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
import time
import multiprocessing
import os
import subprocess
import papermill as pm


def run_notebook_in_dir(index,dir_path):
    delay_seconds = index * 65
    
    input_nb = dir_path / "Video_generate.ipynb"
    output_nb = dir_path / "Video_generate_output.ipynb"
    final_video = dir_path / "final_video.mp4"
    pid = multiprocessing.current_process().pid

    # print(f"📦 PID {pid} — Preparing: {dir_path} (wait {delay_seconds}s)", flush=True)
    # time.sleep(delay_seconds)
    # print(f"⏱️ PID {pid} — Starting execution after delay: {dir_path}", flush=True)

    if input_nb.exists():
        try:
            # pm.execute_notebook(
            #     str(input_nb),
            #     str(output_nb),
            #     parameters={},
            #     kernel_name="python3",
            #     cwd=str(dir_path)  # ← ココでカレントディレクトリ指定できる！
            # )
            cmd = [
                "papermill",
                str(input_nb),
                str(output_nb),
                "--kernel", "python3",
                "--cwd", str(dir_path)
            ]
            subprocess.run(cmd, check=True)

            print(f"✅ PID {pid} — Done: {input_nb}", flush=True)
        except Exception as e:
            print(f"❌ PID {pid} — Failed: {input_nb} -> {e}", flush=True)
            errored_file.write_text(str(e))
    else:
        print(f"⚠️ PID {pid} — Notebook not found: {dir_path}", flush=True)


if __name__ == "__main__":
    print("🚀 Starting run_all.py")  # ← これで最初に出力されるか確認！
    base_dir = Path("songs_dataset")
    print("📂 Collecting notebook directories...", flush=True)
    notebook_dirs = [p for p in base_dir.iterdir() if p.is_dir()]
    print(f"📁 Found {len(notebook_dirs)} candidate dirs", flush=True)

    # Skip dirs with existing final_video.mp4
    filtered_dirs = []
    for p in notebook_dirs:
        print("directory:",p)
        final_video = p / "final_video.mp4"
        first_music = p / "music.mp3"
        errored_file = p / "errored.txt"
        if final_video.exists():
            print(f"✅ Skipping {p} (final_video.mp4 exists)", flush=True)
        else:
            print(f"🚀 final_video_doesn't exist: {p}", flush=True)
            
            if errored_file.exists():
                print(f"❌ Skipping {p} (errored.txt exists)", flush=True)
            else:
                print(f"🚀 errored.txt doesn't exist: {p}", flush=True)

                if first_music.exists():
                    print(f"🚀 Queued for execution: {p}", flush=True)
                    filtered_dirs.append(p)
                else:
                    print(f"✅ Skipping {p} (first_music.mp3 doesn't exists)", flush=True)

    
    notebook_dirs = filtered_dirs

    # 🔥 実行開始（同時に2個まで）
    max_workers = 1
    running_processes = []

    for index, dir_path in enumerate(filtered_dirs):
        while len(running_processes) >= max_workers:
            # プロセスが終わるのを待つ
            for proc in running_processes:
                if not proc.is_alive():
                    running_processes.remove(proc)
            # time.sleep(65)

        proc = multiprocessing.Process(target=run_notebook_in_dir, args=(index, dir_path))
        proc.start()
        running_processes.append(proc)

    # 最後の残りを待つ
    for proc in running_processes:
        proc.join()

    print("🎉 All notebooks done!!")
    


In [None]:
import os
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
import pickle
from google.auth.transport.requests import Request
import time

def authenticate_youtube():
    scopes = [
        "https://www.googleapis.com/auth/youtube.upload",
        "https://www.googleapis.com/auth/youtube"
    ]
    creds = None
    token_path = "token.pickle"

    # 🔄 トークンの読み込み
    if os.path.exists(token_path):
        with open(token_path, "rb") as token:
            creds = pickle.load(token)

    # 🔁 トークンが期限切れならリフレッシュ
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())  # ← これで更新できる！！

    # 🆕 なければ新規認証
    if not creds or not creds.valid:
        flow = InstalledAppFlow.from_client_secrets_file("client_secrets.json", scopes)
        for port in [8080, 8081, 8082, 8888]:
            try:
                creds = flow.run_local_server(port=port, open_browser=False)
                break
            except OSError:
                print(f"⚠️ Port {port} already in use, trying next...")
        else:
            raise RuntimeError("😢 No available ports for OAuth redirect.")

    # 💾 保存
    with open(token_path, "wb") as token:
        pickle.dump(creds, token)

    youtube = build("youtube", "v3", credentials=creds)
    return youtube


def upload_video(youtube, video_path, title, description, category_id="22", privacy_status="unlisted"):
    body = {
        "snippet": {
            "title": title,
            "description": description,
            "categoryId": category_id
        },
        "status": {
            "privacyStatus": privacy_status
        }
    }
    media = MediaFileUpload(video_path, chunksize=-1, resumable=True, mimetype="video/*")
    request = youtube.videos().insert(part="snippet,status", body=body, media_body=media)
    response = request.execute()
    return response["id"]

def mark_as_uploaded(path):
    with open(path, "w") as f:
        f.write("uploaded\n")

def load_text_file(path):
    if not os.path.exists(path):
        return ""
    with open(path, "r", encoding="utf-8") as f:
        return f.read().strip()

def add_to_playlist(youtube, video_id, playlist_id):
    request = youtube.playlistItems().insert(
        part="snippet",
        body={
            "snippet": {
                "playlistId": playlist_id,
                "resourceId": {
                    "kind": "youtube#video",
                    "videoId": video_id
                }
            }
        }
    )
    response = request.execute()
    return response

def process_all_videos(base_dir="songs_dataset"):
    youtube = authenticate_youtube()
    for dir_name in os.listdir(base_dir):
        subdir = os.path.join(base_dir, dir_name)
        if not os.path.isdir(subdir):
            continue

        uploaded_path = os.path.join(subdir, "uploaded.txt")
        if os.path.exists(uploaded_path):
            print(f"✅ Skipping {subdir}, already uploaded.")
            continue

        video_path = os.path.join(subdir, "final_video.mp4")
        title_path = os.path.join(subdir, "title.txt")
        description_path = os.path.join(subdir, "description.txt")

        if not os.path.exists(video_path):
            print(f"⚠️ No video found in {subdir}, skipping.")
            continue

        title = load_text_file(title_path)
        description = load_text_file(description_path)

        print(f"🔼 Uploading video from: {subdir}")

        video_id = upload_video(youtube, os.path.join(subdir, "final_video.mp4"), title, description)
        add_to_playlist(youtube, video_id, "PLQ_36MSwXUhoOhtsRYX8zEpnzoMKaEkTT")

        video_id = upload_video(youtube, os.path.join(subdir, "final_output.mp3"), title, description)
        add_to_playlist(youtube, video_id, "PLQ_36MSwXUhoU1VbOFnRPxj30e2cAyTZ0")
        mark_as_uploaded(uploaded_path)  # ✅ 成功時にマーク！
        print(f"🍅 Video uploaded!! : {subdir}")
        print(f"⌛ Waiting for next upload...")
        time.sleep(300)  # ← 5分休憩（状況に応じて調整）

# 実行
if __name__ == "__main__":
    process_all_videos("songs_dataset")


In [None]:
from pathlib import Path

base_dir = Path("songs_dataset")
notebook_dirs = [p for p in base_dir.iterdir() if p.is_dir()]
print(f"📁 Found {len(notebook_dirs)} candidate dirs", flush=True)

# Skip dirs with existing final_video.mp4
filtered_dirs = []
for p in notebook_dirs:
    print("directory:",p)
    final_video = p / "final_video.mp4"
    first_music = p / "music.mp3"
    errored_file = p / "errored.txt"
    if final_video.exists():
        print(f"✅ Skipping {p} (final_video.mp4 exists)", flush=True)
    else:
        print(f"🚀 final_video_doesn't exist: {p}", flush=True)