## Step 1 Clone repository given by seedVC

# SEED-VC Voice Conversion SERVER

If Google ask you to restart session, just ignore it

In [1]:
!git clone https://github.com/Plachtaa/seed-vc
%cd seed-vc
!pip install -r requirements.txt
!pip uninstall tensorflow -y

Cloning into 'seed-vc'...
remote: Enumerating objects: 907, done.[K
remote: Counting objects: 100% (374/374), done.[K
remote: Compressing objects: 100% (121/121), done.[K
remote: Total 907 (delta 297), reused 253 (delta 253), pack-reused 533 (from 1)[K
Receiving objects: 100% (907/907), 66.27 MiB | 19.42 MiB/s, done.
Resolving deltas: 100% (470/470), done.
/content/seed-vc
Collecting torch (from -r requirements.txt (line 1))
  Downloading torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision (from -r requirements.txt (line 2))
  Downloading torchvision-0.19.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.0 kB)
Collecting torchaudio (from -r requirements.txt (line 3))
  Downloading torchaudio-2.4.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting scipy==1.13.1 (from -r requirements.txt (line 8))
  Downloading scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

Found existing installation: tensorflow 2.19.0
Uninstalling tensorflow-2.19.0:
  Successfully uninstalled tensorflow-2.19.0


# Task
Create a Flask server that listens for POST requests containing base64 encoded audio data in the request body. Decode the base64 audio data, save it to a temporary file, and use this file as the `--source` argument when running the inference script.

In [2]:
pip install tensorflow


Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)
Collecting tensorboard~=2.20.0 (from tensorflow)
  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m620.7/620.7 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl (322 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.0/322.0 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling coll

## Install flask and required libraries

### Subtask:
Install Flask and any other libraries needed to handle the base64 decoding and file operations.


**Reasoning**:
Install Flask to set up the web server for handling POST requests. base64 and tempfile are part of the standard library and do not need explicit installation.



In [3]:
!pip install Flask pyngrok

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


## Create flask application

### Subtask:
Set up a basic Flask application with a route to handle the incoming POST request.


**Reasoning**:
Set up a basic Flask application with a route to handle the incoming POST request as instructed.



In [None]:
from google.colab import files
import os

uploaded = files.upload()

uploaded_file_path = None
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  # Assuming the file is saved in the current directory
  uploaded_file_path = os.path.join(os.getcwd(), fn)
  print(f"Uploaded file path: {uploaded_file_path}")

In [None]:
from flask import Flask, request, jsonify
from pyngrok import ngrok
import base64
import tempfile
import subprocess
import os

# Assuming uploaded_file_path is defined in a previous cell
if 'uploaded_file_path' not in globals() or uploaded_file_path is None:
    # Handle the case where the file hasn't been uploaded yet
    # You might want to return an error or use a default path
    print("Error: Reference audio file not uploaded yet. Please run the upload cell first.")
    # For now, I will use a placeholder and the request will likely fail
    target_audio_path = "/content/default_target.wav"
else:
    target_audio_path = uploaded_file_path

!ngrok config add-authtoken 2wSXD9affKGSze1RTtVyPfEaz3A_2Q7oVURk8StjmkQaswTKK

application = Flask(__name__)
@application.route('/', methods=['POST'])
def handle_post_request():
    temp_audio_path = None
    try:
        data = request.get_json()
        if 'audio_data' not in data:
            return jsonify({"error": "Missing 'audio_data' in request body"}), 400

        audio_data_base64 = data['audio_data']
        audio_bytes = base64.b64decode(audio_data_base64)

        # Save decoded audio to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
            temp_audio_file.write(audio_bytes)
            temp_audio_path = temp_audio_file.name

        print(f"Decoded audio saved to temporary file: {temp_audio_path}")

        # Define inference command
        # Assuming the inference script is at /content/seed-vc/inference.py
        # and the output directory is ./reconstructed relative to the script
        inference_command = [
            "python", "inference.py",
            "--source", temp_audio_path,
            "--target", target_audio_path, # Use the uploaded file path as target
            "--output", "./reconstructed",
            "--diffusion-steps", "25",
            "--length-adjust", "1.0",
            "--inference-cfg-rate", "0.7"
            # Add config and checkpoint paths if needed for fine-tuned model
            # "--config-path", "./runs/Kobe_Taffy_ft/config_dit_mel_seed_uvit_whisper_small_wavenet.yml",
            # "--checkpoint-path", "./runs/Kobe_Taffy_ft/ft_model.pth"
        ]

        # Execute inference script
        # Change cwd to the seed-vc directory
        process = subprocess.run(inference_command, capture_output=True, text=True, cwd="/content/seed-vc")
        print("Inference script stdout:", process.stdout)
        print("Inference script stderr:", process.stderr)

        if process.returncode != 0:
            return jsonify({"error": "Inference script failed", "details": process.stderr}), 500

        # Assuming the inference script outputs a file named based on the inputs
        # You might need to adjust this based on the actual output filename
        # Construct the expected output filename based on the source and target filenames
        source_base = os.path.basename(temp_audio_path).replace('.wav', '')
        target_base = os.path.basename(target_audio_path).replace('.wav', '')
        output_filename = f"vc_{source_base}_{target_base}_1.0_25_0.7.wav"
        output_path = os.path.join("/content/seed-vc/reconstructed", output_filename)


        if not os.path.exists(output_path):
             # Fallback or error handling if the output filename is not as expected
             # List files in the output directory to help debugging
            output_files = os.listdir("/content/seed-vc/reconstructed")
            print(f"Expected output not found: {output_path}. Files in output dir: {output_files}")
            return jsonify({"error": f"Inference output file not found at {output_path}", "files_in_output_dir": output_files}), 500


        # Read the output audio file and encode it back to base64
        with open(output_path, "rb") as output_audio_file:
            output_audio_bytes = output_audio_file.read()
            output_audio_base64 = base64.b64encode(output_audio_bytes).decode('utf-8')

        # Clean up the inference output file
        os.remove(output_path)

        return jsonify({"converted_audio_base64": output_audio_base64}), 200

    except Exception as e:
        return jsonify({"error": f"Error processing request: {e}"}), 500
    finally:
        # Clean up the temporary input audio file
        if temp_audio_path and os.path.exists(temp_audio_path):
            os.remove(temp_audio_path)
            print(f"Temporary input file removed: {temp_audio_path}")


print(ngrok.connect(5000))

if __name__ == '__main__':
  application.run(threaded=True, port=5000)

Error: Reference audio file not uploaded yet. Please run the upload cell first.
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
NgrokTunnel: "https://6f2ceda564f8.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
