In [1]:
from flask import Flask, request, render_template, jsonify
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline
import os

app = Flask(__name__)

# Load NLTK data
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

# Define a function to extract topics using TF-IDF
def extract_topics(text):
    vectorizer = TfidfVectorizer(stop_words='english', max_features=100)
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_names = vectorizer.get_feature_names_out()
    topics = [term for term in feature_names]
    return topics

# Define a function for translation
def translate_text(text, target_language='fr'):
    translator = pipeline('translation_en_to_fr', model='Helsinki-NLP/opus-mt-en-fr')
    translation = translator(text)[0]['translation_text']
    return translation

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/process_text', methods=['POST'])
def process_text():
    data = request.form if request.form else request.json

    # Get text input from the user
    text = data.get('text')

    # Tokenization
    sentences = sent_tokenize(text)
    words = [word_tokenize(sentence) for sentence in sentences]

    # Part-of-Speech Tagging
    pos_tags = [pos_tag(sentence) for sentence in words]

    # Named Entity Recognition
    ner_tags = [ne_chunk(pos_tag(sentence)) for sentence in words]

    # Topic Extraction
    topics = extract_topics(text)

    # Translation
    translation = translate_text(text)

    # Construct response JSON
    response = {
        'pos_tags': pos_tags,
        'ner_tags': ner_tags,
        'topics': topics,
        'translation': translation
    }
    return jsonify(response)

if __name__ == '__main__':
    app.run(debug=True)


None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Utilisateur\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Utilisateur\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\Utilisateur\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\Utilisateur\AppData\Roaming\nltk_data...


 * Serving Flask app '__main__'
 * Debug mode: on


[nltk_data]   Unzipping corpora\words.zip.
 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
