In [None]:
# from flask import Flask, request, jsonify
# from flask_cors import CORS
import requests
import os
from newsplease import NewsPlease
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import UnstructuredExcelLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.6, max_tokens=4096)
embeddings = OpenAIEmbeddings()
basepath = os.path.abspath(os.curdir)
text_splitter = RecursiveCharacterTextSplitter()
output_parser = StrOutputParser()
load_dotenv()

# Imports End here

basepath = os.path.abspath(os.curdir)


In [None]:
print('''''')

In [None]:

def read_prompt(chain_name,basepath=basepath):
    with open(basepath+'\\prompts\\'+chain_name+'-prompt.txt',encoding='utf-8') as fh:
        return fh.read()

def create_chain(chain_name,sample):
    if sample==False:
        prompt = ChatPromptTemplate.from_messages([("system", read_prompt(chain_name)),("user", "{input}")])
        final_chain =  prompt | llm | output_parser    
        return final_chain
    elif sample==True:
        prompt = ChatPromptTemplate.from_messages([("system", read_prompt(chain_name)),("user", "{input}")])
        if  chain_name[:4] == 'logi':
            loader = UnstructuredExcelLoader(basepath+'\\Training\\'+chain_name+"-train.xlsx")
        elif  chain_name[:4] == 'stat':
            loader = TextLoader(basepath+'\\Training\\'+chain_name+"-train.txt")
        elif  chain_name[:4] == 'fact':
            loader = TextLoader(basepath+'\\Training\\'+chain_name+"-train.txt")
        data = loader.load()
        documents = text_splitter.split_documents(data)
        vector = FAISS.from_documents(documents, embeddings)
        document_chain = create_stuff_documents_chain(llm, prompt)
        retriever = vector.as_retriever()
        return create_retrieval_chain(retriever, document_chain)


def init_all_chains():
    chains = {}
    chain_names = [x[:8] for x in os.listdir(basepath+'\\'+'prompts')]
    for chain_name in chain_names:
        if chain_name in ['logi-con','logi-par']:
            chains[chain_name] = create_chain(chain_name,sample=False)
            print(chain_name,' Success without context')
            continue
        chains[chain_name] = create_chain(chain_name,sample=True)
        print(chain_name,' Success with context')
           
    print('completed initializing all chains')
    return chains



chains = init_all_chains()


In [None]:
# Core Codes
def get_articles(urls):
    '''Method to scrape articles using URLs'''
    articles = []
    for url in urls:
        if not requests.get(url).ok:
            print("Cannot scrape URL "+str((urls.index(url))+1)+" due to copyright issues.")
            continue
        raw_article = NewsPlease.from_url(url,timeout=5)
        if raw_article.maintext==None:
            print("Cannot scrape URL "+str((urls.index(url))+1)+" due to copyright issues.")
            continue
        articles.append(raw_article.maintext)
    print("\nScraped "+str(len(articles))+" out of "+str(len(urls))+" URLs.\n\nProcessing scraped articles...")
    return articles
def invoke(chain,input,chains=chains):
    response =  chains[chain].invoke({"input": input})
    print(chain," ran successfully")
    return response['answer']
def invoke_no_sample(chain,input,chains=chains):
    response =  chains[chain].invoke({"input": input})
    print(chain," ran successfully")
    return response

In [None]:



# Logical chain codes



def check_validity(response):
    validity = response.split('\n\n')[-1]
    if ("invalid" or "not valid") in validity.lower():
        decision = False
    elif "valid" in validity.lower():
        decision = True
    else:
        decision = False
    return decision


def process_final(final_mcq):
    f = False
    res = ''
    fin = ''
    temp_split =final_mcq.split('\n\n') 
    for mrl in temp_split:
        if mrl[:10].lower()=="output mcq":
            res = mrl[12:]
            # if fin.splitlines()[-1][:7] != 'correct':
            #     temp_split.index(mrl)
    return res


In [None]:
urls = [
'https://www.edie.net/carbon-majors-worlds-biggest-emitters-grew-their-carbon-footprint-since-paris-agreement-study-finds/',
'https://www.earth.com/news/using-ai-writing-illustration-drastically-reduces-carbon-footprint/'
]

In [None]:
res = get_articles(urls)

-------------

In [None]:
res = get_articles(urls)

In [None]:
def preprocess_articles_logi(articles):
    # print(articles[0])
    # print(articles[1])
    valid_paragraphs = []
    rel = []
    par = []
    for article in articles:  
        relevant_text = invoke_no_sample('logi-con', article)
        rel.append(relevant_text)
        paraphrased_text = invoke_no_sample('logi-par',relevant_text)
        par.append(paraphrased_text)
        paragraph_list = [pl for pl in paraphrased_text.split("\n\n") if len(pl)>100]
        val_responses = []
        for paragraph in paragraph_list:
            val_response = invoke('logi-val',paragraph)
            val_responses.append(val_response)
            val_response_list = val_response.split("\n\n")
            validity = val_response_list[2]
            if ("invalid" or "not valid") in validity.lower():
                decision = "Invalid"
            elif "valid" in validity.lower():
                decision = "Valid"
            else:
                decision = "Unclear"
            if decision == "Valid":
                valid_paragraphs.append(paragraph)
    return valid_paragraphs,rel,par

In [None]:
valid_paragraphs,rel,par = preprocess_articles_logi(res)

In [None]:
def logical_chain(valid_paragraphs):
    ques_responses = []
    opt_responses = []
    pre_improve = []
    improved_responses = []
    final_mcqs = []
    for p in valid_paragraphs:
        #Question
        ques_response = invoke('logi-que', p)
        ques_responses.append(ques_response)
        ques_response_split = ques_response.split("\n\n")
        for qrs in ques_response_split:
            if qrs[:6].lower()!="source" and qrs[:8].lower()!="question":
                ques_response_split.remove(qrs)
        source_text = ques_response_split[0]
        final_ques = "\n\n".join(ques_response_split)
        #Options
        opt_response = invoke('logi-opt',final_ques)
        opt_responses.append(opt_response)
        opt_response_split = opt_response.split("\n\n")
        if opt_response_split[0][:6].lower()=="source":
            final_opt = opt_response_split[0] + "\n\n" + opt_response_split[2]
        else:
            final_opt = source_text + "\n\n" + opt_response_split[1]
        pre_improve.append(final_opt)
        #Improvement
        mcq_response = invoke('logi-imp', final_opt)
        improved_responses.append(mcq_response)
        mcq_response_list = mcq_response.split("\n\n")
        for mrl in mcq_response_list:
            if mrl[:10].lower()=="output mcq":
                final_mcq = mrl[12:]
                final_mcqs.append(final_mcq)
    return final_mcqs

In [None]:
final_resultant = logical_chain(valid_paragraphs)

In [None]:
print(final_resultant)

In [None]:
!pip freeze > requirements.txt