In [8]:
!pip install transformers --quiet
!pip install auto-gptq --quiet
!pip install flask --quiet
!pip install flask-ngrok2 --quiet

In [None]:
# install ngrok linux version using the following command or you can get the
# latest version from its official website- https://dashboard.ngrok.com/get-started/setup

!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.tgz

In [None]:
# extract the downloaded file using the following command

!tar -xvf /content/ngrok-stable-linux-amd64.tgz

In [None]:
# paste your AuthToken here and execute this command

#@markdown Obtain your Ngrok auth token from [here](https://dashboard.ngrok.com/get-started/your-authtoken)
NGROK_AUTH_TOKEN = '' #@param {type:"string"}

!./ngrok authtoken NGROK_AUTH_TOKEN

In [5]:
from transformers import AutoTokenizer, pipeline, logging
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
import argparse

In [None]:
model_name = "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(model_name)
quantize_config = BaseQuantizeConfig.from_pretrained(model_name)
model = AutoGPTQForCausalLM.from_quantized(model_name,
                                           use_safetensors=True,
                                           model_basename="Wizard-Vicuna-13B-Uncensored-GPTQ-4bit-128g.compat.no-act-order",
                                           device="cuda:0",
                                           use_triton=False, # True or False
                                           quantize_config=quantize_config)

In [None]:
# import Flask from flask module
from flask import Flask, request

# import run_with_ngrok from flask_ngrok to run the app using ngrok
from flask_ngrok2 import run_with_ngrok
import json

logging.set_verbosity(logging.CRITICAL)

def generate(prompt):
  # prompt_template = f'''### Instrukcja: {prompt}
  # ### Prezenter:'''
  pipe = pipeline(
      "text-generation",
      model=model,
      tokenizer=tokenizer,
      max_new_tokens=512,
      temperature=0.7,
      top_p=0.95,
      repetition_penalty=1.15
  )
  return pipe(prompt)[0]['generated_text']

app = Flask(__name__) #app name
run_with_ngrok(app, auth_token=NGROK_AUTH_TOKEN)

@app.route('/', methods=['GET'])
def test():
  response = {'status':'OK','message':'Test'}
  return json.dumps(response)

@app.route('/generate', methods=['POST'])
def genRoute():
  prompt = request.form.get('prompt')
  print(prompt)
  text = generate(prompt)
  response = {'status': 'OK', 'message':text}
  return json.dumps(response)

app.run()
