In [2]:
from getpass import getpass
import os

# 🔐 Ask for your token (secure input)
token = getpass('Enter your GitHub token: ')

# Store token in environment variable
os.environ['GITHUB_TOKEN'] = token

# ✅ Use environment variable correctly in clone command
!git clone https://Shogofa-Nawrozy:${GITHUB_TOKEN}@github.com/Shogofa-Nawrozy/persian-nlp-llm-platform.git

# ✅ Change to repo directory
%cd persian-nlp-llm-platform


Enter your GitHub token: ··········
Cloning into 'persian-nlp-llm-platform'...
remote: Enumerating objects: 303, done.[K
remote: Counting objects: 100% (303/303), done.[K
remote: Compressing objects: 100% (224/224), done.[K
remote: Total 303 (delta 119), reused 229 (delta 65), pack-reused 0 (from 0)[K
Receiving objects: 100% (303/303), 7.54 MiB | 22.19 MiB/s, done.
Resolving deltas: 100% (119/119), done.
/content/persian-nlp-llm-platform


In [3]:
# Install ngrok
!pip install flask ngrok transformers sentencepiece stanza
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip -o ngrok-stable-linux-amd64.zip


Collecting ngrok
  Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting stanza
  Downloading stanza-1.10.1-py3-none-any.whl.metadata (13 kB)
Collecting emoji (from stanza)
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.3.0->stanza)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.3.0->stanza)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.3.0->stanza)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.3.0->stanza)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.

In [4]:
!./ngrok authtoken 2yhL1SV2u2kmKmZ8iNhoWk982IO_6UTrrGpiDN52UnbEpz3wr

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [5]:
!pip install pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Downloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.11


In [6]:
from pyngrok import ngrok

# Set your authtoken
ngrok.set_auth_token("2yhL1SV2u2kmKmZ8iNhoWk982IO_6UTrrGpiDN52UnbEpz3wr")




In [12]:
from pyngrok import ngrok
from flask import Flask, request, jsonify
from nlp.pos_tagger import pos_tag_text
from nlp.summarizer import summarize
from nlp.translator import translate_fa_to_en
from nlp.vocab_trainer import find_usages


app = Flask(__name__)
public_url = ngrok.connect(5000)
app.config['BASE_API'] = public_url
print("API is live at:", public_url)

@app.route('/')
def home():
    return "Persian NLP API running!"

def generate_explanation(tags):
    subj = next((t['text'] for t in tags if t['deprel'] == 'nsubj'), 'someone')
    verb = next((t['text'] for t in tags if t['pos'] == 'VERB'), 'did something')
    obj = next((t['text'] for t in tags if t['deprel'] == 'obj'), '')
    return f"The sentence is about {subj} performing the action '{verb}'{' on ' + obj if obj else ''}."

@app.route("/pos-tag", methods=["POST"])
def pos_tag():
    text = request.json["text"]
    tags = pos_tag_text(text)
    explanation = generate_explanation(tags)
    return jsonify({"tags": tags, "explanation": explanation})

@app.route('/vocab-usage', methods=['POST'])
def vocab_usage_route():
    word = request.json.get('text', '').strip().lower()

    # TODO: Replace this with a real lookup (JSON file or DB)
    # Example dynamic dictionary for demonstration
    vocab_dict = {
        "work": {
            "persian": "کار",
            "transliteration": "kār",
            "example_en": "He goes to work every day.",
            "example_fa": "او هر روز به سرِ کار می‌رود.",
            "phonetic": "/wɜːrk/",
            "audio": f"{public_url}/static/audio/work.mp3",
            "usages": [
                {"pos": "Noun", "sentence_en": "This is hard work.", "sentence_fa": "این یک کار سخت است."},
                {"pos": "Verb", "sentence_en": "I work from home.", "sentence_fa": "من از خانه کار می‌کنم."},
                {"pos": "Adjective", "sentence_en": "Work shoes are heavy.", "sentence_fa": "کفش‌های کاری سنگین هستند."}
            ]
        },
        "مکتب": {
            "persian": "مکتب",
            "transliteration": "maktab",
            "example_en": "He studies in a traditional maktab.",
            "example_fa": "او در یک مکتب سنتی درس می‌خواند.",
            "phonetic": "/mak.tab/",
            "audio": f"{public_url}/static/audio/مکتب.mp3",
            "usages": [
                {"pos": "Noun", "sentence_en": "The boy walks to maktab.", "sentence_fa": "پسر به مکتب می‌رود."}
            ]
        }
    }

    data = vocab_dict.get(word)
    if data:
        return jsonify(data)
    else:
        return jsonify({
            "persian": "—",
            "transliteration": "—",
            "example_en": f"No example found for '{word}'",
            "example_fa": "—",
            "phonetic": "—",
            "audio": "",
            "usages": []
        })



@app.route('/translate', methods=['POST'])
def translation_route():
    text = request.json.get('text', '')
    translation = translate_fa_to_en(text)
    return jsonify({'translation': translation})


@app.route('/summarize', methods=['POST'])
def do_summarize():
    data = request.get_json()
    text = data.get('text', '')
    summary = summarize(text)
    return jsonify({'summary': summary})

app.run(port=5000)


config.json:   0%|          | 0.00/907 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/82.0 [00:00<?, ?B/s]

TypeError: not a string