In [2]:
import os
import yfinance as yf
from bs4 import BeautifulSoup
import requests
from datetime import datetime, timedelta
# from langchain_milvus import Milvus
from langchain.vectorstores import Milvus
from langchain.embeddings import OpenAIEmbeddings
# from langchain.chains import RetrievalQAChain
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain.chat_models import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from transformers import pipeline
import numpy as np
import statsmodels.api as sm

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [3]:
from langchain.vectorstores import Qdrant
from qdrant_client import QdrantClient
from qdrant_client import models
from qdrant_client.models import PointStruct, VectorParams, Distance, HnswConfig,HnswConfigDiff

In [4]:
RISK_FREE_RATE = 0.02  # Risk-free rate for Sharpe ratio

In [9]:
# qdrant_client = QdrantClient("financial_analysis")
qdrant_client = QdrantClient(path="financial_analysis")


if qdrant_client.collection_exists(collection_name="financial_documents"):
    qdrant_client.delete_collection(collection_name="financial_documents")

qdrant_client.create_collection(
    collection_name="financial_documents",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),  # Assuming vector size of 1536
)

True

In [11]:
OPENAI_API_KEY = "3a6b230b917b4893a150f0ad7fa126cf"
os.environ["AZURE_OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://cpe-clx-openai.openai.azure.com/"
os.environ["OPENAI_API_VERSION"] = "2023-05-15" 

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

azure_llm = AzureChatOpenAI(
    model="cpe-clx-gpt4o",
    azure_deployment="cpe-clx-gpt4o",
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_version=os.environ["OPENAI_API_VERSION"],
)

embed_model = AzureOpenAIEmbeddings(
    model="text-embedding-ada-002",
    # deployment_name="cpe-clx-embedding",
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_version=os.environ["OPENAI_API_VERSION"] ,
    azure_deployment="cpe-clx-embedding"
)

# Settings.llm = azure_llm
# Settings.embed_model = embed_model

  warn_deprecated(


In [12]:
vectorstore = Qdrant(client=qdrant_client, collection_name="financial_documents", embedding_function=embed_model.embed_documents)

  warn_deprecated(


In [13]:
# Function to parse and vectorize financial documents
def process_financial_documents(folder_path):
    company_vectors = {}
    for company_name in os.listdir(folder_path):
        company_folder = os.path.join(folder_path, company_name)
        if os.path.isdir(company_folder):
            documents = []
            for file in os.listdir(company_folder):
                file_path = os.path.join(company_folder, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    documents.append(f.read())
            # Add vectorized documents to Qdrant collection
            vectorstore.add_documents(documents, metadata={"company": company_name})
            company_vectors[company_name] = documents
    return company_vectors


In [14]:
# Scraping stock prices, volatility, and PE ratio from Yahoo Finance
def get_stock_data(company_name, num_months):
    stock = yf.Ticker(company_name)
    benchmark = yf.Ticker("^GSPC")  # S&P 500 Index for beta calculation

    end_date = datetime.now()
    start_date = end_date - timedelta(days=30*num_months)

    stock_hist = stock.history(start=start_date, end=end_date)
    benchmark_hist = benchmark.history(start=start_date, end=end_date)

    # Calculate returns
    stock_returns = stock_hist['Close'].pct_change().dropna()
    benchmark_returns = benchmark_hist['Close'].pct_change().dropna()

    # Align dates between stock and benchmark returns
    stock_returns, benchmark_returns = stock_returns.align(benchmark_returns, join='inner')

    # Volatility calculation
    volatility = stock_returns.std() * np.sqrt(252)  # Annualized volatility

    # PE ratio
    pe_ratio = stock.info['trailingPE'] if 'trailingPE' in stock.info else None

    # Financial metrics calculation (Alpha, Beta, R-squared, Std Deviation, Sharpe Ratio)
    X = sm.add_constant(benchmark_returns)
    model = sm.OLS(stock_returns, X).fit()
    beta = model.params[1]  # Beta
    alpha = model.params[0]  # Alpha
    r_squared = model.rsquared  # R-squared
    std_dev = stock_returns.std()  # Standard deviation of stock returns

    # Sharpe ratio
    mean_return = stock_returns.mean() * 252  # Annualized return
    sharpe_ratio = (mean_return - RISK_FREE_RATE) / std_dev

    # Scrape news for sentiment analysis
    news_url = f"https://finance.yahoo.com/quote/{company_name}/news"
    news_content = requests.get(news_url).text
    soup = BeautifulSoup(news_content, 'html.parser')
    news_articles = soup.find_all('h3')
    news_texts = [article.get_text() for article in news_articles]

    return {
        "volatility": volatility,
        "pe_ratio": pe_ratio,
        "beta": beta,
        "alpha": alpha,
        "r_squared": r_squared,
        "std_dev": std_dev,
        "sharpe_ratio": sharpe_ratio,
        "news_texts": news_texts
    }


In [15]:
# Perform sentiment analysis using HuggingFace transformers
def get_sentiment(news_texts):
    sentiment_pipeline = pipeline("sentiment-analysis")
    sentiments = sentiment_pipeline(news_texts)
    positive_sentiment = sum([1 for s in sentiments if s['label'] == 'POSITIVE'])
    negative_sentiment = sum([1 for s in sentiments if s['label'] == 'NEGATIVE'])

    total = len(sentiments)
    return (positive_sentiment - negative_sentiment) / total  # Net sentiment score

In [17]:
# Standard QA chain using LangChain RetrievalQAChain
def risk_analysis(company_name, question):
    retriever = vectorstore.as_retriever()

    # Use a simple RetrievalQAChain to retrieve relevant documents and generate the answer
    qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
        llm=azure_llm,
        retriever=retriever,
        chain_type="stuff"
    )

    # Retrieve relevant company documents
    context = qa_chain.run(question)

    # Get stock data
    stock_data = get_stock_data(company_name, 6)  # Assume 6 months for example
    sentiment_score = get_sentiment(stock_data["news_texts"])

    # Final risk analysis
    risk = "High" if stock_data["volatility"] > 0.3 or sentiment_score < 0 else "Low"
    strategy = "Buy" if stock_data["pe_ratio"] < 15 and sentiment_score > 0 else "Hold/Sell"

    # TLM Trustworthiness Score (Hypothetical Implementation)
    trustworthiness_score = np.random.uniform(0.7, 0.9)  # Placeholder for TLM score, random for now

    # Print financial metrics and analysis
    print(f"Company: {company_name}")
    print(f"Volatility: {stock_data['volatility']:.2f}")
    print(f"PE Ratio: {stock_data['pe_ratio']}")
    print(f"Beta: {stock_data['beta']:.2f}")
    print(f"Alpha: {stock_data['alpha']:.4f}")
    print(f"R-squared: {stock_data['r_squared']:.2f}")
    print(f"Standard Deviation: {stock_data['std_dev']:.4f}")
    print(f"Sharpe Ratio: {stock_data['sharpe_ratio']:.2f}")
    print(f"Sentiment Score: {sentiment_score:.2f}")
    print(f"Risk: {risk}")
    print(f"Strategy: {strategy}")
    print(f"Trustworthiness Score: {trustworthiness_score:.2f}")

    # Return final analysis with retrieved documents
    return context


In [20]:
# Main function to process the folder of financial documents
def main():
    folder_path = "C:\\Users\\CQTF47\\Desktop\\IU Masters\\Thesis\\Data\\ADS.DE"
    num_months = 6  # Example input
    question = "What is the company's financial health?"

    # Process financial documents and store in vector DB (Qdrant)
    company_vectors = process_financial_documents(folder_path)

    for company_name in company_vectors.keys():
        # Perform risk analysis and trading strategy generation
        response = risk_analysis(company_name, question)
        print(f"Generated Response for {company_name}: {response}")

if __name__ == "__main__":
    main()

In [19]:
if __name__ == "__main__":
    main()

In [1]:
import yfinance as yf

In [6]:
msft = yf.Ticker("ADS.DE")

In [7]:
msft.info

{'address1': 'Adi-Dassler-Strasse 1',
 'city': 'Herzogenaurach',
 'zip': '91074',
 'country': 'Germany',
 'phone': '49 91 32 84 0',
 'fax': '49 91 32 84 0',
 'website': 'https://www.adidas-group.com',
 'industry': 'Footwear & Accessories',
 'industryKey': 'footwear-accessories',
 'industryDisp': 'Footwear & Accessories',
 'sector': 'Consumer Cyclical',
 'sectorKey': 'consumer-cyclical',
 'sectorDisp': 'Consumer Cyclical',
 'longBusinessSummary': 'adidas AG, together with its subsidiaries, designs, develops, produces, and markets athletic and sports lifestyle products in Europe, the Middle East, Africa, North America, Greater China, the Asia-Pacific, and Latin America. It offers footwear, apparel, and accessories and gear, such as bags and balls under the adidas brand; golf footwear and apparel under the adidas Golf brand; and outdoor footwear under the Five Ten brand. It sells its products through its own retail stores; mono-branded franchise stores and shop-in-shops; and wholesale and

In [40]:
benchmark = yf.Ticker("^GDAXI")

In [17]:
hist = benchmark.history()

In [19]:
hist['Close'].pct_change().dropna()

Date
2024-09-19 00:00:00-04:00    0.016977
2024-09-20 00:00:00-04:00   -0.001941
2024-09-23 00:00:00-04:00    0.002809
2024-09-24 00:00:00-04:00    0.002511
2024-09-25 00:00:00-04:00   -0.001861
2024-09-26 00:00:00-04:00    0.004039
2024-09-27 00:00:00-04:00   -0.001253
2024-09-30 00:00:00-04:00    0.004237
2024-10-01 00:00:00-04:00   -0.009324
2024-10-02 00:00:00-04:00    0.000138
2024-10-03 00:00:00-04:00   -0.001681
2024-10-04 00:00:00-04:00    0.008970
2024-10-07 00:00:00-04:00   -0.009586
2024-10-08 00:00:00-04:00    0.009689
2024-10-09 00:00:00-04:00    0.007113
2024-10-10 00:00:00-04:00   -0.002070
2024-10-11 00:00:00-04:00    0.006052
2024-10-14 00:00:00-04:00    0.007708
2024-10-15 00:00:00-04:00   -0.007609
2024-10-16 00:00:00-04:00    0.004679
2024-10-17 00:00:00-04:00   -0.000171
Name: Close, dtype: float64

In [21]:
from datetime import datetime, timedelta

In [41]:
stock = yf.Ticker("ADS.DE")

end_date = datetime.now()
start_date = end_date - timedelta(days=30 * 6)

stock_hist = stock.history(start=start_date, end=end_date)
benchmark_hist = benchmark.history(start=start_date, end=end_date)

stock_returns = stock_hist['Close'].pct_change().dropna()
benchmark_returns = benchmark_hist['Close'].pct_change().dropna()

In [42]:
stock_returns

Date
2024-04-23 00:00:00+02:00    0.018320
2024-04-24 00:00:00+02:00   -0.000878
2024-04-25 00:00:00+02:00   -0.005709
2024-04-26 00:00:00+02:00    0.019876
2024-04-29 00:00:00+02:00    0.006063
                               ...   
2024-10-14 00:00:00+02:00    0.013699
2024-10-15 00:00:00+02:00    0.012247
2024-10-16 00:00:00+02:00   -0.062578
2024-10-17 00:00:00+02:00    0.002670
2024-10-18 00:00:00+02:00    0.022636
Name: Close, Length: 128, dtype: float64

In [43]:
stock_returns, benchmark_returns = stock_returns.align(benchmark_returns, join='inner')

In [44]:
stock_returns

Date
2024-04-23 00:00:00+02:00    0.018320
2024-04-24 00:00:00+02:00   -0.000878
2024-04-25 00:00:00+02:00   -0.005709
2024-04-26 00:00:00+02:00    0.019876
2024-04-29 00:00:00+02:00    0.006063
                               ...   
2024-10-14 00:00:00+02:00    0.013699
2024-10-15 00:00:00+02:00    0.012247
2024-10-16 00:00:00+02:00   -0.062578
2024-10-17 00:00:00+02:00    0.002670
2024-10-18 00:00:00+02:00    0.022636
Name: Close, Length: 128, dtype: float64

In [45]:
benchmark_returns

Date
2024-04-23 00:00:00+02:00    0.015500
2024-04-24 00:00:00+02:00   -0.002699
2024-04-25 00:00:00+02:00   -0.009477
2024-04-26 00:00:00+02:00    0.013603
2024-04-29 00:00:00+02:00   -0.002351
                               ...   
2024-10-14 00:00:00+02:00    0.006940
2024-10-15 00:00:00+02:00   -0.001133
2024-10-16 00:00:00+02:00   -0.002739
2024-10-17 00:00:00+02:00    0.007749
2024-10-18 00:00:00+02:00    0.003579
Name: Close, Length: 128, dtype: float64

In [65]:
stock.news

[{'uuid': '47bd0936-53ae-350f-8471-d5e4650ad977',
  'title': 'Puzzle solutions for Saturday, Oct. 19, 2024',
  'publisher': 'USA TODAY',
  'link': 'https://finance.yahoo.com/m/47bd0936-53ae-350f-8471-d5e4650ad977/puzzle-solutions-for.html',
  'providerPublishTime': 1729318588,
  'type': 'STORY',
  'thumbnail': {'resolutions': [{'url': 'https://s.yimg.com/uu/api/res/1.2/DDamgETuQHGcax70n5MdnQ--~B/aD0zNjQ4O3c9NTQ3MjthcHBpZD15dGFjaHlvbg--/https://media.zenfs.com/en/usa_today_news_641/332db63d06bfb18cbc483f6b7e9849c6',
     'width': 5472,
     'height': 3648,
     'tag': 'original'},
    {'url': 'https://s.yimg.com/uu/api/res/1.2/PLq7ni1G6TPB1YgemBgwig--~B/Zmk9ZmlsbDtoPTE0MDtweW9mZj0wO3c9MTQwO2FwcGlkPXl0YWNoeW9u/https://media.zenfs.com/en/usa_today_news_641/332db63d06bfb18cbc483f6b7e9849c6',
     'width': 140,
     'height': 140,
     'tag': '140x140'}]}},
 {'uuid': 'b7c0b3bb-141e-30e4-b8cb-357166d1c438',
  'title': 'Q3 2024 Independent Bank Corp (Massachusetts) Earnings Call',
  'publishe

In [89]:
df = stock.income_stmt

In [90]:
df.columns

DatetimeIndex(['2023-12-31', '2022-12-31', '2021-12-31', '2020-12-31'], dtype='datetime64[ns]', freq=None)

In [91]:
df.columns = df.columns.astype(str)

In [86]:
# stock.income_stmt.columns = stock.income_stmt.columns.strftime('%Y-%m-%d')

In [92]:
df.columns

Index(['2023-12-31', '2022-12-31', '2021-12-31', '2020-12-31'], dtype='object')

In [93]:
df.to_json()

'{"2023-12-31":{"Tax Effect Of Unusual Items":-1800000.0,"Tax Rate For Calcs":0.15,"Normalized EBITDA":1449000000.0,"Total Unusual Items":-12000000.0,"Total Unusual Items Excluding Goodwill":-12000000.0,"Net Income From Continuing Operation Net Minority Interest":-119000000.0,"Reconciled Depreciation":1212000000.0,"Reconciled Cost Of Revenue":11244000000.0,"EBITDA":1437000000.0,"EBIT":225000000.0,"Net Interest Income":-83000000.0,"Interest Expense":160000000.0,"Interest Income":39000000.0,"Normalized Income":-108800000.0,"Net Income From Continuing And Discontinued Operation":-75000000.0,"Total Expenses":21148000000.0,"Rent Expense Supplemental":142000000.0,"Total Operating Income As Reported":268000000.0,"Diluted Average Shares":178557615.0,"Basic Average Shares":178543596.0,"Diluted EPS":-0.42,"Basic EPS":-0.42,"Diluted NI Availto Com Stockholders":-75000000.0,"Net Income Common Stockholders":-75000000.0,"Otherunder Preferred Stock Dividend":0.0,"Net Income":-75000000.0,"Minority Int

In [69]:
stock_hist_json = stock_hist.to_json(orient='records', date_format='iso')
print(stock_hist_json)

[{"Open":226.1106323162,"High":226.8085015863,"Low":221.9234014832,"Close":223.1197509766,"Volume":440247,"Dividends":0.0,"Stock Splits":0.0},{"Open":224.3160962352,"High":227.2072753906,"Low":223.4188379352,"Close":227.2072753906,"Volume":429712,"Dividends":0.0,"Stock Splits":0.0},{"Open":229.3009008449,"High":230.4972503285,"Low":226.1106304843,"Close":227.0078887939,"Volume":482904,"Dividends":0.0,"Stock Splits":0.0},{"Open":226.9082024677,"High":227.3069805592,"Low":223.8176190155,"Close":225.7118377686,"Volume":348040,"Dividends":0.0,"Stock Splits":0.0},{"Open":227.4066887541,"High":230.1981658936,"Low":224.1167161684,"Close":230.1981658936,"Volume":370777,"Dividends":0.0,"Stock Splits":0.0},{"Open":230.3975683374,"High":233.1890454549,"Low":229.8990881076,"Close":231.5939178467,"Volume":386905,"Dividends":0.0,"Stock Splits":0.0},{"Open":232.1920806354,"High":232.1920806354,"Low":224.216397107,"Close":225.7118377686,"Volume":536674,"Dividends":0.0,"Stock Splits":0.0},{"Open":227.3

In [94]:
stock.actions

Unnamed: 0_level_0,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-05-17 00:00:00+02:00,0.7,0.0


In [96]:
stock.dividends.values

array([0.7])

In [97]:
stock.capital_gains

Series([], dtype: object)

In [98]:
stock.quarterly_income_stmt

Unnamed: 0,2024-06-30,2024-03-31,2023-12-31,2023-09-30,2023-06-30,2023-03-31
Tax Effect Of Unusual Items,0.0,0.0,0.0,0.0,0.0,
Tax Rate For Calcs,0.305921,0.301,0.033735,0.17,0.219,
Normalized EBITDA,653000000.0,633000000.0,-171000000.0,764000000.0,492000000.0,
Net Income From Continuing Operation Net Minority Interest,196000000.0,171000000.0,-421000000.0,249000000.0,85000000.0,
Reconciled Depreciation,287000000.0,273000000.0,310000000.0,323000000.0,298000000.0,
Reconciled Cost Of Revenue,2863000000.0,2662000000.0,2665000000.0,3044000000.0,2625000000.0,
EBITDA,653000000.0,633000000.0,-171000000.0,764000000.0,492000000.0,
EBIT,366000000.0,360000000.0,-481000000.0,441000000.0,194000000.0,
Net Interest Income,-42000000.0,-91000000.0,82000000.0,-84000000.0,-52000000.0,
Interest Expense,62000000.0,115000000.0,-66000000.0,115000000.0,71000000.0,


In [48]:
import requests
from bs4 import BeautifulSoup

def extract_h3_headings(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    h3_headings = [h3.get_text() for h3 in soup.find_all('h3')]
    return h3_headings

# Example usage
url = "https://finance.yahoo.com/lookup/?s=ADS.DE"
headings = extract_h3_headings(url)
print(headings)

[' News', ' Life', ' Entertainment', '  Finance', ' Sports', 'New on Yahoo', 'Nasdaq pops, Netflix stock soaring on Q3 earnings results', 'Netflix earnings, S&P 500 record, CVS CEO changeup: 3 Things', 'American Express earnings, housing, Fedspeak: What to Watch', "Chip stocks rally as TSMC forecasts strong AI demand for 'many years'", "Disney joins Peacock, Max, and others in raising prices. Here's what it means for your subscription costs.", "Regions Financial's quarterly profit falls on weaker interest income", 'Two Dow Jones Giants Report Mixed Earnings; One Stock Is Basing', 'Intuitive Surgical Eyes Breakout On Massive Earnings Beat, But 2025 Launch Remains Key', 'American Express profit rises in Q3 as card members continue to spend', 'Sector Update: Consumer Stocks Mixed Pre-Bell Friday', 'American Express Results Show Affluent Cardmembers Strike ‘Cautious’ Tone', 'Schlumberger Q3 Earnings: Mixed Results As Middle East Shines And Latin America Falters', 'Procter & Gamble Posts Mi

In [63]:
def extract_h3_headings(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    h3_headings = [h3.get_text() for h3 in soup.find_all('h3')]
    filtered_headings = [heading for heading in h3_headings if len(heading.split()) > 4]
    return filtered_headings

# Example usage
# url = "https://finance.yahoo.com/lookup/?s=ADS.DE"
url = "https://finance.yahoo.com/quote/ADS.DE/news/"
headings = extract_h3_headings(url)
print(headings)

[]


In [64]:
def extract_h3_headings_with_class(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    h3_headings = [h3.get_text() for h3 in soup.find_all('h3', class_='clamp  yf-y1ahm5')]
    return h3_headings

# Example usage
headings_with_class = extract_h3_headings_with_class(url)
print(headings_with_class)

[]


In [50]:
from llama_index.prompts import Prompt


ModuleNotFoundError: No module named 'llama_index.prompts'

In [51]:
from llama_index.core.prompts import Prompt


In [59]:
from llama_index.llms.azure_openai import AzureOpenAI

In [62]:
from ragas.integrations.langchain import EvaluatorChain
from ragas.metrics import faithfulness, answer_relevancy, context_recall, context_precision

