In [13]:
from flask import Flask, request, jsonify
import requests
from bs4 import BeautifulSoup
import spacy
from collections import Counter
import re

app = Flask(__name__)

In [8]:
@app.route('/is_person_name', methods=['GET'])
def is_person_name():
    try:
        data = request.args.get('txt') 
        text = data['text']

        nlp = spacy.load("en_core_web_sm")
        doc = nlp(text)

        for ent in doc.ents:
            if ent.label_ == "PERSON":
                return jsonify({'is_person_name': True})

        return jsonify({'is_person_name': False})
    except Exception as e:
        return jsonify({'error': str(e)})


def count_occurrences(soup, target_word): 
    vicinity_words = ["book", "novel", "author"] 
    content = soup.find('div', {'id': 'mw-content-text'}) 
    text = content.get_text().lower()
 
    target_word_count = text.count(target_word.lower())
 
    vicinity_counts = Counter()
    for vicinity_word in vicinity_words:
        vicinity_pattern = re.compile(fr'\b{vicinity_word.lower()}\b')
        vicinity_counts[vicinity_word] = len(re.findall(vicinity_pattern, text))

    return target_word_count, vicinity_counts

In [None]:
@app.route('/search_wikipedia', methods=['GET'])
def search_wikipedia():
    try:
        word = request.args.get('txt') 

        search_url = f'https://en.wikipedia.org/wiki/{word}'
        response = requests.get(search_url)

        if response.status_code != 200:
            return jsonify({'error': f"Failed to retrieve data. Status code: {response.status_code}"})

        soup = BeautifulSoup(response.text, 'html.parser')

        if soup: 
            target_word_count, vicinity_counts = count_occurrences(soup, word)
        
            result = {
                'word': word,
                'target_word_count': target_word_count,
                'vicinity_counts': dict(vicinity_counts)
            }

            return jsonify({'result': result})

        return jsonify({'result': str(soup)})
    except Exception as e:
        return jsonify({'error': str(e)})