In [None]:
# @title Oogbabooga Colab Notebook Beta Project
# @markdown TL;DR: Runtime > Run All (⌘/Ctrl+F9). Takes about 5 minutes to start. You will be promped to authorize Google Drive access.
# @markdown To prevent Colab from disconnecting you and give you more time to work, you can run the Colab on low GPU mode.
# @markdown Shoutout to this... I will add their name later. I can't remember.
%%html
<audio src="https://github.com/anars/blank-audio/raw/master/1-hour-of-silence.mp3" autoplay muted loop controls />

In [2]:
# @title Initialization
# @markdown In this cell, all the necessary variables that will be used throughout the notebook are initialized. 
# @markdown This includes the Google Drive folder where the data is stored, the path where Google Drive will be mounted, the URL of the text-generation repository, and others.
# @markdown Always read the source code of the notebook before running it because it may contain malicious code and some bad person may have changed it and can steal your data.

# -*- coding: utf-8 -*-
import os
import subprocess
from pathlib import Path
from google.colab import drive
from socket import gethostname, gethostbyname
from requests import get
from psutil import virtual_memory

# Variables
google_drive_folder = "Colab Data/Oogbabooga"
google_drive_mount_path = "/content/drive"
google_drive_data_directory_relative_path = google_drive_folder
google_drive_data_directory_path = f"{google_drive_mount_path}/MyDrive/{google_drive_data_directory_relative_path}"
text_generation_repo_url = 'https://github.com/oobabooga/text-generation-webui.git'
gptq_repo_url = 'https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda'
model_name = 'anon8231489123/vicuna-13b-GPTQ-4bit-128g'
model_file_url = f'https://huggingface.co/{model_name}/resolve/main/vicuna-13b-4bit-128g.safetensors'
model_file_path = f'{google_drive_data_directory_path}/text-generation-webui/models/{model_name}/vicuna-13b-4bit-128g.safetensors'


In [None]:
# @title Google Drive Mounting
# @markdown In this cell, Google Drive is mounted to the specified path. 
# @markdown A symbolic link is created between the Google Drive data directory and the current directory for easy access. 
# @markdown Also, a text file is created in the data directory indicating the notebook that is currently using it.

# Mount Google Drive
from google.colab import drive
drive.mount(google_drive_mount_path, force_remount=True)

google_drive_data_directory_relative_path = google_drive_folder
google_drive_data_directory_path = f"{google_drive_mount_path}/MyDrive/{google_drive_data_directory_relative_path}"
!mkdir -p "{google_drive_data_directory_path}"
!ln -nsf "{google_drive_data_directory_path}" ./data
!touch "data/This folder is used by the Colab notebook for the Oogbabooga files and models.txt"
print(f"Data will be stored in Google Drive folder: \"{google_drive_data_directory_relative_path}\", which is mounted under \"{google_drive_data_directory_path}\"")



In [None]:
# @title GPU Information
# @markdown This cell checks if the current Colab instance is connected to a GPU. 
# @markdown It uses the `nvidia-smi` command to fetch GPU info and prints it.

# GPU Info
gpu_info = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, text=True).stdout
if 'failed' in gpu_info:
    print('Not connected to a GPU')
else:
    print(gpu_info)


In [None]:
# @title RAM Information
# @markdown This cell prints the total available RAM on the current Colab instance. 
# @markdown It also checks if the instance is a high-RAM runtime by comparing the total RAM with a threshold value.

# RAM Info
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
if ram_gb < 20:
    print('Not using a high-RAM runtime')
else:
    print('You are using a high-RAM runtime!')


In [None]:
# @title Text Generation WebUI Setup
# @markdown Here, the 'text-generation-webui' repository is cloned from GitHub into the data directory on Google Drive if it does not exist already. 
# @markdown Then, the requirements for the repository are installed using pip.

# Text Generation WebUI Setup
if not os.path.exists(f"{google_drive_data_directory_path}/text-generation-webui"):
    for _ in range(5):  # Try up to 5 times, sometimes the TLS transport fails
        try:
            !git clone {text_generation_repo_url} "{google_drive_data_directory_path}/text-generation-webui" && break  # If successful, break the loop
        except Exception:
            pass  # If not successful, pass and retry
else:
    print('text-generation-webui already exists. Skipping clone operation.')

os.chdir(f"{google_drive_data_directory_path}/text-generation-webui")
!pip install -r requirements.txt




In [None]:
# @title 4-bit Mode Support Setup
# @markdown In this cell, the 'GPTQ-for-LLaMa' repository is cloned from GitHub into the repositories directory under 'text-generation-webui' if it does not exist already. 
# @markdown The necessary requirements for this repository are installed and the setup script is run.

# Clone GPTQ repository and setup
if not os.path.exists('repositories/GPTQ-for-LLaMa'):
    !mkdir -p repositories
    os.chdir('repositories')
    for _ in range(5):  # Try up to 5 times, sometimes the TLS transport fails
        try:
            !git clone -b cuda https://github.com/oobabooga/GPTQ-for-LLaMa.git && break  # If successful, break the loop
        except Exception:
            pass  # If not successful, pass and retry
    os.chdir('GPTQ-for-LLaMa')
    !pip install ninja
    !pip install -r requirements.txt
    !python setup_cuda.py install
else:
    print('GPTQ-for-LLaMa repository already exists. Skipping setup.')



In [None]:
# @title Download Model
# @markdown The model is downloaded from Hugging Face and stored in the text-generation-webui directory. 
# @markdown The model is downloaded only if it does not exist already.

# Download Model
os.chdir(f"{google_drive_data_directory_path}/text-generation-webui")
model_path = "models/anon8231489123_vicuna-13b-GPTQ-4bit-128g/vicuna-13b-4bit-128g.safetensors"
if not os.path.isfile(model_path):
    !python download-model.py --text-only anon8231489123/vicuna-13b-GPTQ-4bit-128g
    !wget https://huggingface.co/anon8231489123/vicuna-13b-GPTQ-4bit-128g/resolve/main/vicuna-13b-4bit-128g.safetensors
    !ls -lisa
    # Move Model File
    !mv vicuna-13b-4bit-128g.safetensors {model_path}
else:
    print(f"Model file {model_path} already exists. Skipping download.")


In [None]:
# @title Run Application
# @markdown In the final cell, the necessary version of Pillow is installed, and the application is run with the specified model and parameters.

# Run Application
!pip install --ignore-installed Pillow==9.3.0
!python server.py --share --model anon8231489123_vicuna-13b-GPTQ-4bit-128g --model_type llama --chat --wbits 4 --groupsize 128
