In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import warnings
import csv
import json
import flask_cors, flask
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS

warnings.filterwarnings('ignore')

In [15]:
app = Flask(__name__)
CORS(app)

class Node:
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.next = None

class LinkedList:
    def __init__(self):
        self.head = None

    def insert(self, key, value):
        new_node = Node(key, value)
        new_node.next = self.head
        self.head = new_node

    def search(self, key):
        current = self.head
        while current:
            if current.key == key:
                return current.value
            current = current.next
        return None

class HashTable:
    def __init__(self, size):
        self.size = size
        self.table = [None] * size

    def hash_function(self, key):
        return hash(key) % self.size

    def insert(self, key, value):
        index = self.hash_function(key)
        if self.table[index] is None:
            self.table[index] = LinkedList()
        self.table[index].insert(key, value)

    def search(self, key):
        index = self.hash_function(key)
        if self.table[index] is not None:
            return self.table[index].search(key)
        return None
    
def build_forward_index(data):
    forward_index = HashTable(size=100)
    for article_id, words in data["title"].items():
        forward_index.insert(article_id, words)
    return forward_index

def build_inverted_index(data):
    inverted_index = HashTable(size=100)
    for article_id, words in data["content"].items():
        for word in words:
            if inverted_index.search(word):
                inverted_index.search(word).append(article_id)
            else:
                inverted_index.insert(word, [article_id])
    return inverted_index

def remove_duplicates(input_list):
    seen = set()
    result = []
    for item in input_list:
        if item not in seen:
            seen.add(item)
            result.append(item)
    return result

In [16]:
def load_data_from_json(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

json_file_path = "Files\cleaned.json"
json_data = load_data_from_json(json_file_path)

forward_index = build_forward_index(json_data)
inverted_index = build_inverted_index(json_data)

In [17]:
@app.route("/search_1", methods=["GET"], endpoint='single_word_search')
def single_word_search():
    word = request.args.get('word')
    return jsonify(remove_duplicates(inverted_index.search(word)))

""" @app.route("/search", methods=["GET"])
def multi_word_search():
    query = request.args.get('query')
    result = set()
    words = query.split()
    if words:
        result = set(inverted_index.search(words[0])) if inverted_index.search(words[0]) else set()
        for word in words[1:]:
            current_result = inverted_index.search(word)
            if current_result:
                result.intersection_update(current_result)
    return jsonify(list(result)) """


def rank_results(data, results):
    word_frequency = {}
    for article_id in results:
        for word in data["content"][article_id]:
            word_frequency[word] = word_frequency.get(word, 0) + 1
    return sorted(results, key=lambda x: sum(word_frequency[word] for word in data["content"][x]), reverse=True)

def display_results(data, results):
    response = {}
    for article_id in results:
        response[f"Article {article_id}"] = data['content'][article_id]
    return response

def display_results_endpoint():
    try:
        request_data = request.json
        data = request_data.get("data")
        results = request_data.get("results")

        if data is None or results is None:
            return jsonify({"error": "Invalid request format"}), 400

        response = display_results(data, results)
        return jsonify(response)
    except Exception as e:
        return jsonify({"error": str(e)}), 500


""" @app.route("/add", methods=["GET"])
def add_content(data, new_article):
    article_id = str(len(data["index"]))
    data["index"][article_id] = len(data["index"])
    data["source"][article_id] = new_article[0]
    data["title"][article_id] = new_article[1]
    data["content"][article_id] = new_article[2]

    return data """

if __name__ == "__main__":
    app.run(debug=False)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [02/Dec/2023 12:57:32] "GET /search_1?word=for HTTP/1.1" 200 -
127.0.0.1 - - [02/Dec/2023 12:57:41] "GET /search_1?word=ronaldo HTTP/1.1" 200 -
[2023-12-02 12:57:54,739] ERROR in app: Exception on /search_1 [GET]
Traceback (most recent call last):
  File "c:\Users\haris\miniconda3\envs\miniconda-py3-tf2.0\lib\site-packages\flask\app.py", line 2529, in wsgi_app
    response = self.full_dispatch_request()
  File "c:\Users\haris\miniconda3\envs\miniconda-py3-tf2.0\lib\site-packages\flask\app.py", line 1825, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "c:\Users\haris\miniconda3\envs\miniconda-py3-tf2.0\lib\site-packages\flask_cors\extension.py", line 176, in wrapped_function
    return cors_after_request(app.make_response(f(*args, **kwargs)))
  File "c:\Users\haris\miniconda3\envs\miniconda-py3-tf2.0\lib\site-packages\flask\app.py", line 1823, in full_dispatch_request
    rv = self.dispatch_re