## Start commit-autosuggestions server
Running flask app server in Google Colab for people without GPU

#### Clone github repository

In [None]:
!git clone https://github.com/graykode/commit-autosuggestions.git
%cd commit-autosuggestions
!pip install -r requirements.txt

#### Download model weights

Download the two weights of model from the google drive through the gdown module.
1. Added model : A model trained Code2NL on Python using pre-trained CodeBERT (Feng at al, 2020).
2. Diff model : A model retrained by initializing with the weight of model (1), adding embedding of the added and deleted parts(`patch_ids_embedding`) of the code.

Download pre-trained weight

Language | Added | Diff
--- | --- | ---
python | 1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4 | 1--gcVVix92_Fp75A-mWH0pJS0ahlni5m
javascript | 1-F68ymKxZ-htCzQ8_Y9iHexs2SJmP5Gc | 1-39rmu-3clwebNURMQGMt-oM4HsAkbsf

In [None]:
ADD_MODEL='1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4'
DIFF_MODEL='1--gcVVix92_Fp75A-mWH0pJS0ahlni5m'

!pip install gdown \
    && mkdir -p weight/added \
    && mkdir -p weight/diff \
    && gdown "https://drive.google.com/uc?id=$ADD_MODEL" -O weight/added/pytorch_model.bin \
    && gdown "https://drive.google.com/uc?id=$DIFF_MODEL" -O weight/diff/pytorch_model.bin

#### ngrok setting with flask

Before starting the server, you need to configure ngrok to open this notebook to the outside. I have referred [this jupyter notebook](https://github.com/alievk/avatarify/blob/master/avatarify.ipynb) in detail.

In [None]:
!pip install flask-ngrok

Go to https://dashboard.ngrok.com/auth/your-authtoken (sign up if required), copy your authtoken and put it below.



In [5]:
# Paste your authtoken here in quotes
authtoken = "1kskZgJ8KpCRvYnzSF63AcodvBr_4RMXxFo4Sa2qLrRaKjhJW"

Set your region

Code | Region
--- | ---
us | United States
eu | Europe
ap | Asia/Pacific
au | Australia
sa | South America
jp | Japan
in | India

In [6]:
# Set your region here in quotes
region = "jp"

# Input and output ports for communication
local_in_port = 5000
local_out_port = 5000

In [7]:
config =\
f"""
authtoken: {authtoken}
region: {region}
console_ui: False
tunnels:
  input:
    addr: {local_in_port}
    proto: http    
  output:
    addr: {local_out_port}
    proto: http
"""

with open('ngrok.conf', 'w') as f:
  f.write(config)

In [9]:
from subprocess import Popen, PIPE
import shlex
import json
import time


def run_with_pipe(command):
  commands = list(map(shlex.split,command.split("|")))
  ps = Popen(commands[0], stdout=PIPE, stderr=PIPE)
  for command in commands[1:]:
    ps = Popen(command, stdin=ps.stdout, stdout=PIPE, stderr=PIPE)
  return ps.stdout.readlines()


def get_tunnel_adresses():
  info = run_with_pipe("curl http://localhost:4040/api/tunnels")
  assert info

  info = json.loads(info[0])
  for tunnel in info['tunnels']:
    url = tunnel['public_url']
    port = url.split(':')[-1]
    local_port = tunnel['config']['addr'].split(':')[-1]
    print(f'{url} -> {local_port} [{tunnel["name"]}]')
    if tunnel['name'] == 'input':
      in_addr = url
    elif tunnel['name'] == 'output':
      out_addr = url
    else:
      print(f'unknown tunnel: {tunnel["name"]}')

  return in_addr, out_addr

In [10]:
import time
from subprocess import Popen, PIPE

# (Re)Open tunnel
ps = Popen('./scripts/open_tunnel_ngrok.sh', stdout=PIPE, stderr=PIPE)
time.sleep(3)

In [None]:
# Get tunnel addresses
try:
  in_addr, out_addr = get_tunnel_adresses()
  print("Tunnel opened")
except Exception as e:
  [print(l.decode(), end='') for l in ps.stdout.readlines()]
  print("Something went wrong, reopen the tunnel")

#### Run you server!

In [12]:
import os
import torch
import argparse
from tqdm import tqdm
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
from transformers import (RobertaConfig, RobertaTokenizer)

from commit.model import Seq2Seq
from commit.utils import (Example, convert_examples_to_features)
from commit.model.diff_roberta import RobertaModel

from flask import Flask, jsonify, request

MODEL_CLASSES = {'roberta': (RobertaConfig, RobertaModel, RobertaTokenizer)}

In [13]:
def get_model(model_class, config, tokenizer, mode):
    encoder = model_class(config=config)
    decoder_layer = nn.TransformerDecoderLayer(
        d_model=config.hidden_size, nhead=config.num_attention_heads
    )
    decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
    model = Seq2Seq(encoder=encoder, decoder=decoder, config=config,
            beam_size=args.beam_size, max_length=args.max_target_length,
            sos_id=tokenizer.cls_token_id, eos_id=tokenizer.sep_token_id)

    assert args.load_model_path
    assert os.path.exists(os.path.join(args.load_model_path, mode, 'pytorch_model.bin'))

    model.load_state_dict(
        torch.load(
            os.path.join(args.load_model_path, mode, 'pytorch_model.bin'),
            map_location=torch.device('cpu')
        ),
        strict=False
    )
    return model

def get_features(examples):
    features = convert_examples_to_features(examples, args.tokenizer, args, stage='test')
    all_source_ids = torch.tensor(
        [f.source_ids[:args.max_source_length] for f in features], dtype=torch.long
    )
    all_source_mask = torch.tensor(
        [f.source_mask[:args.max_source_length] for f in features], dtype=torch.long
    )
    all_patch_ids = torch.tensor(
        [f.patch_ids[:args.max_source_length] for f in features], dtype=torch.long
    )
    return TensorDataset(all_source_ids, all_source_mask, all_patch_ids)

def create_app():
    @app.route('/')
    def index():
        return jsonify(hello="world")

    @app.route('/added', methods=['POST'])
    def added():
        if request.method == 'POST':
            payload = request.get_json()
            example = [
                Example(
                    idx=payload['idx'],
                    added=payload['added'],
                    deleted=payload['deleted'],
                    target=None
                )
            ]
            message = inference(model=args.added_model, data=get_features(example))
            return jsonify(idx=payload['idx'], message=message)

    @app.route('/diff', methods=['POST'])
    def diff():
        if request.method == 'POST':
            payload = request.get_json()
            example = [
                Example(
                    idx=payload['idx'],
                    added=payload['added'],
                    deleted=payload['deleted'],
                    target=None
                )
            ]
            message = inference(model=args.diff_model, data=get_features(example))
            return jsonify(idx=payload['idx'], message=message)

    @app.route('/tokenizer', methods=['POST'])
    def tokenizer():
        if request.method == 'POST':
            payload = request.get_json()
            tokens = args.tokenizer.tokenize(payload['code'])
            return jsonify(tokens=tokens)

    return app

def inference(model, data):
    # Calculate bleu
    eval_sampler = SequentialSampler(data)
    eval_dataloader = DataLoader(data, sampler=eval_sampler, batch_size=len(data))

    model.eval()
    p=[]
    for batch in tqdm(eval_dataloader, total=len(eval_dataloader)):
        batch = tuple(t.to(args.device) for t in batch)
        source_ids, source_mask, patch_ids = batch
        with torch.no_grad():
            preds = model(source_ids=source_ids, source_mask=source_mask, patch_ids=patch_ids)
            for pred in preds:
                t = pred[0].cpu().numpy()
                t = list(t)
                if 0 in t:
                    t = t[:t.index(0)]
                text = args.tokenizer.decode(t, clean_up_tokenization_spaces=False)
                p.append(text)
    return p

**Set enviroment**

In [14]:
import easydict 

args = easydict.EasyDict({
    'load_model_path': 'weight/', 
    'model_type': 'roberta',
    'config_name' : 'microsoft/codebert-base',
    'tokenizer_name' : 'microsoft/codebert-base',
    'max_source_length' : 512,
    'max_target_length' : 128,
    'beam_size' : 10,
    'do_lower_case' : False,
    'device' : torch.device("cuda" if torch.cuda.is_available() else "cpu")
})

In [None]:
# flask_ngrok_example.py
from flask_ngrok import run_with_ngrok

app = Flask(__name__)
run_with_ngrok(app)  # Start ngrok when app is run

config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
config = config_class.from_pretrained(args.config_name)
args.tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name, do_lower_case=args.do_lower_case)

# budild model
args.added_model =get_model(model_class=model_class, config=config,
                        tokenizer=args.tokenizer, mode='added').to(args.device)
args.diff_model = get_model(model_class=model_class, config=config,
                        tokenizer=args.tokenizer, mode='diff').to(args.device)

app = create_app()
app.run()

## Set commit configure
Now, set commit configure on your local computer.
```shell
$ commit configure --endpoint http://********.ngrok.io
```