In [None]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import pipeline
from transformers import T5Tokenizer, T5ForConditionalGeneration
qa = pipeline("question-answering", model='distilbert-base-cased-distilled-squad', max_anwer_len=100, tokenizer='distilbert-base-cased-distilled-squad')

In [None]:
model_name = 'tuner007/pegasus_summarizer'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(torch_device)
tokenizer = PegasusTokenizer.from_pretrained(model_name)
summary_model = PegasusForConditionalGeneration.from_pretrained(model_name)

In [None]:
model_name = 'tuner007/pegasus_summarizer'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to("cuda")



In [None]:
from llama_cpp import Llama

print("loading model...")
# Download model here https://huggingface.co/TheBloke/stable-vicuna-13B-GGML/tree/main
llm = Llama(model_path="F:/Dokumente/AI/Mosaic/models/gpt4-x-vicuna-13B.ggmlv3.q4_0.bin")
print("model loaded")

In [None]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
languageModel = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")

In [107]:
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import wikipedia

def get_summary(input_text):
    batch = tokenizer([input_text],truncation=True,max_length=1024, return_tensors="pt").to(torch_device)
    gen_out = model.generate(**batch,num_beams=5, num_return_sequences=1, temperature=1.5, min_length=100, max_length=512, early_stopping=True)
    output_text = tokenizer.batch_decode(gen_out, skip_special_tokens=True)[0]
    return output_text

def searchByQuerry(query):
    results = search(query, num_results=3)
    return results

from bs4 import BeautifulSoup
import requests

def searchWithEngine(query):
    results = []
    
    # Führe den Web-Scraper aus und erhalte die relevanten Webseiten
    query = query.lower() # Umwandlung der Suchanfrage in Kleinbuchstaben für den Vergleich
    query = query.replace("?", "")
    # Bing-Suche verwenden, um relevante Webseiten zu finden
    url = f"https://www.bing.com/search?q={query}"
    response = requests.get(url)
    html_content = response.content
    
    soup = BeautifulSoup(html_content, "html.parser")
    search_results = soup.find_all("li", class_="b_algo")  # Finden der Suchergebnisse in Bing
    for result in search_results:
        title = result.find("h2").text  # Extrahieren des Titels der Webseite
        url = result.find("a")["href"]  # Extrahieren der URL der Webseite
        results.append(url)  # Hinzufügen der Webseite zu den Ergebnissen
    
    return results


def searchResultToText(result):

    page = requests.get(result)
    soup = BeautifulSoup(page.content, 'html.parser')
    paragraphs = soup.find_all('p')
    text = ""
    for p in paragraphs:
        text += p.text + "\n"
    return text

def get_answer(question, context):
    # torch_device = "cpu"
    languageModel.to(torch_device)
    #context = get_summary(context)
    print("Generating answer...")
    input_text = "Q: %s context: %s" % (question, context)
    input_ids = tokenizer(str(input_text), return_tensors="pt").input_ids.to(torch_device)
    answer = languageModel.generate(input_ids, max_length=1000, num_beams=4, early_stopping=True, min_length=5)
    try:
        
        
        return tokenizer.decode(answer[0])
    except Exception as e:
        print(e)
        return "No answer found"
    finally:
        torch.cuda.empty_cache()

    

def outputToText(question, answer):
    output = llm(
        "Question: " + question + "; Answer: " + str(answer) +"; Write the sentnence using the answer:",
        echo=True,
        stop="\n",
        temperature=0.6,
        max_tokens=100,
    )
    out = output["choices"][0]["text"].split("the answer: ")[1]
    return out
    




In [108]:
import time
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:500mb"

def querry(text, summary):
    torch.cuda.memory.empty_cache()
    time.sleep(3)
    query = text
    print("Bang wird durchsucht...")
    completeText = ""
    for index, i in enumerate(searchWithEngine(query)):
        completeText += searchResultToText(i)
        if index == 1:
            break
    # try:
    #     completeText += wikipedia.summary(wikipedia.search(query)[0])
    # except:
    #     pass
    #print(completeText.replace("\n\n", "\n"))
    #print(completeText)
    print(summary)
    answer = ""
    if summary:
        answer = get_summary(completeText.replace("\n\n", "\n").replace("\n", " "))
    else:
        answer = get_answer(query, completeText.replace("\n\n", "\n").replace("\n", " "))

    return answer
    
    
    
# if __name__ == "__main__":
#     print(request("When was the first computer invented?") )


In [None]:
print(querry("Capital of Germany", False))

In [109]:
from flask import Flask, jsonify, request, make_response
from flask_cors import CORS

app = Flask(__name__)
CORS(app)


@app.route('/question')
def question():
    question = request.args.get('q')
    summary = None
    try:
        summary = request.args.get('s')
    except:
        pass
    if question is None:
        return jsonify(message='No question provided')
    makeSummary = True
    if summary == None:
        makeSummary = False
    answer = querry(question, makeSummary)
    answer = answer.replace("<pad>", "")
    answer = answer.replace("</s>", "")
    return make_response(jsonify(answer), 200)



if __name__ == '__main__':
    app.run()
    



 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:30:37] "GET /question?q=When%20has%20the%20first%20computer%20been%20build HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:35:27] "GET /question?q=When%20was%20the%20first%20Computer%20build HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:36:11] "GET /question?q=When%20was%20the%20first%20Computer%20build HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:36:33] "GET /question?q=When%20was%20Volkswagen%20founded HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:37:26] "GET /question?q=When%20was%20Volkswagen%20founded HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:37:53] "GET /question?q=When%20was%20Volk HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:38:09] "GET /question?q=When%20was%20Volkswagen%20founded HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:39:08] "GET /question?q=When%20was%20Volkswagen%20founded HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:39:35] "GET /question?q=When%20was%20Volkswagen%20founded? HTTP/1.1" 200 -


Bang wird durchsucht...
False
Generating answer...


127.0.0.1 - - [27/May/2023 10:40:06] "GET /question?q=When%20was%20Volkswagen%20founded? HTTP/1.1" 200 -
