# Imports for running model locally:

In [48]:
import os
import subprocess
from huggingface_hub import hf_hub_download
from transformers import T5Tokenizer, AutoModelForSeq2SeqLM, pipeline
from dotenv import load_dotenv

Create your own Hugging Face account. From profile, just create a new key and put write down the key. For me, I put my key the .env file.

In [None]:
load_dotenv()
api_key = os.getenv('sasamori_hugging_face_api_key')

In [35]:
HUGGING_FACE_API_KEY = os.environ.get(api_key)

In [36]:
model_id = "google/t5-3b-ssm-nq"
filenames = [
    "config.json", "generation_config.json", "pytorch_model.bin",
    "special_tokens_map.json", "spiece.model", "tf_model.h5", "tokenizer_config.json"
]

Running this next code will download hugging face models to your .cache directory on your local machine. For me, mine is:

/Users/Andrew/.cache/huggingface/hub/models--google--t5-3b-ssm-nq/snapshots/0af6fd26fe569211ab0134617f6afc1c208c05b3/config.json

/Users/Andrew/.cache/huggingface/hub/models--google--t5-3b-ssm-nq/snapshots/0af6fd26fe569211ab0134617f6afc1c208c05b3/generation_config.json

And so forth...

In [None]:
for filename in filenames:
        downloaded_model_path = hf_hub_download(
                    repo_id=model_id,
                    filename=filename,
                    token=HUGGING_FACE_API_KEY,
                #     cache_dir=custom_download_path  # Next time, specify the cache directory
        )
        print(downloaded_model_path)

# Running the LLM

You don't actually need to run this code, it's just to show that everything is local, and you don't need wifi to run the model

In [None]:
def check_connectivity():
    try:
        # Ping Google's DNS server to check connectivity
        output = subprocess.check_output("ping -c 1 8.8.8.8", shell=True)
        return "Connected"
    except subprocess.CalledProcessError:
        return "Not Connected"

def toggle_wifi(state):
    if state == "on":
        os.system("networksetup -setairportpower airport on")  # For macOS
    elif state == "off":
        os.system("networksetup -setairportpower airport off")  # For macOS

# print(check_connectivity())
# toggle_wifi("off")
# time.sleep(0.5)
print(check_connectivity())
# toggle_wifi("on")
# print(check_connectivity())

In [38]:
tokenizer = T5Tokenizer.from_pretrained(model_id, legacy=False, clean_up_tokenization_spaces=True)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

text2text_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=-1, max_length=1000)

Sample text2text output

In [None]:
output = text2text_pipeline("""" My father's home country's flag includes a white background with a red dot in the middle.
The most famous from this country includes sushi, ramen, and sashimi. What country do you think this is?
""")
print(output)

# Flask API integration

In [28]:
from flask import Flask, render_template_string, request, jsonify
from werkzeug.serving import make_server
import threading
import time
app = Flask(__name__)

In [None]:
# Define the route for the form
@app.route('/')
def index():
    return render_template_string('''
        <html>
            <head><title>Submit a Prompt</title></head>
            <body>
                <h1>Enter a Prompt</h1>
                <form action="/generate" method="post">
                    <textarea name="prompt" rows="4" cols="50"></textarea><br>
                    <input type="submit" value="Submit">
                </form>
            </body>
        </html>
    ''')

# Define the API route to process the input
@app.route('/generate', methods=['POST'])
def generate():
    input_text = request.form.get("prompt", "")
    if not input_text:
        return "No prompt provided", 400
    try:
        output = text2text_pipeline(input_text)
        return render_template_string('''
            <html>
                <head><title>LLM Response</title></head>
                <body>
                    <h1>Response:</h1>
                    <p>{{output}}</p>
                    <a href="/">Submit another prompt</a>
                </body>
            </html>
        ''', output=output[0]['generated_text'])
    except Exception as e:
        return jsonify({"error": str(e)}), 500

# Helper class to run Flask in a thread
class FlaskThread(threading.Thread):
    def __init__(self, app):
        threading.Thread.__init__(self)
        self.server = make_server('127.0.0.1', 5002, app)
        self.ctx = app.app_context()
        self.ctx.push()

    def run(self):
        print("Starting Flask server...")
        self.server.serve_forever()

    def shutdown(self):
        self.server.shutdown()

I defined the local site as:
http://127.0.0.1:5002/

In [None]:
flask_thread = FlaskThread(app)
flask_thread.start()

In [41]:

# Simulate running the server for a while, then shutting down
try:
    time.sleep(10)  # Keep the server running for 10 seconds
finally:
    flask_thread.shutdown()