In [2]:
import os
import pandas as pd
import requests
from datetime import datetime
from dotenv import load_dotenv

# تحميل مفاتيح API من ملف .env
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ALPHA_API_KEY = os.getenv("ALPHAVANTAGE_API_KEY")
FMP_API_KEY = os.getenv("FMP_API_KEY")
OER_API_KEY = os.getenv("EXCHANGERATES_API_KEY")

In [3]:
# تحميل البيانات المالية من ملف CSV
csv_path = 'financial_risk_analysis_large.csv'
financial_data = pd.read_csv(csv_path)
financial_data.head()

Unnamed: 0,CreditScore,AnnualIncome,LoanAmount,LoanDuration,Age,EmploymentStatus,MaritalStatus,NumberOfDependents,EducationLevel,HomeOwnershipStatus,...,JobTenure,MonthlySavings,AnnualBonuses,AnnualExpenses,MonthlyHousingCosts,MonthlyTransportationCosts,MonthlyFoodCosts,MonthlyHealthcareCosts,MonthlyEntertainmentCosts,LoanApproved
0,402,63295,18830,13,29,Self-Employed,Widowed,2,Doctorate,Other,...,24,378,3741,40058,977,412,399,136,124,0
1,735,55936,23729,1,42,Self-Employed,Divorced,3,Master,Own,...,10,575,4115,16745,695,206,898,252,131,0
2,570,62547,19660,7,54,Self-Employed,Single,3,Doctorate,Mortgage,...,16,691,4105,23273,627,266,392,73,36,0
3,406,46129,21674,23,25,Self-Employed,Divorced,3,High School,Other,...,6,452,4559,42163,397,307,250,378,-32,0
4,371,57725,12189,26,42,Employed,Widowed,4,Master,Own,...,2,690,7856,30087,723,315,114,88,68,0


In [4]:
import requests
import pandas as pd

def get_stock_data(symbols=["AAPL", "MSFT", "NVDA", "AMZN", "TSLA", "GOOGL", "META", "JPM", "WMT", "PG"]):
    all_data = {}
    for symbol in symbols:
        url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={ALPHA_API_KEY}&outputsize=compact"
        try:
            response = requests.get(url)
            data = response.json()
            if 'Time Series (Daily)' in data:
                df = pd.DataFrame.from_dict(data['Time Series (Daily)'], orient='index').astype(float)
                df.index = pd.to_datetime(df.index)
                df.sort_index(inplace=True)
                all_data[symbol] = df
            else:
                print(f"No data available for {symbol}")
        except Exception as e:
            print(f"Error fetching data for {symbol}: {e}")
    return all_data

# Fetch data for the ten companies
alpha_data = get_stock_data()

# Display the first few rows for each company
for symbol, df in alpha_data.items():
    print(f"\nData for {symbol}:")
    print(df.head())

No data available for AAPL
No data available for MSFT
No data available for NVDA
No data available for AMZN
No data available for TSLA
No data available for GOOGL
No data available for META
No data available for JPM
No data available for WMT
No data available for PG


In [5]:
import requests
import pandas as pd

def get_company_profile(symbols=["AAPL", "MSFT", "NVDA", "AMZN", "TSLA", "GOOGL", "META", "JPM", "WMT", "PG"]):
    all_data = {}
    for symbol in symbols:
        url = f"https://financialmodelingprep.com/api/v3/profile/{symbol}?apikey={FMP_API_KEY}"
        try:
            response = requests.get(url)
            data = response.json()
            if data and isinstance(data, list) and len(data) > 0:
                all_data[symbol] = data[0]
            else:
                print(f"No profile data available for {symbol}")
        except Exception as e:
            print(f"Error fetching profile data for {symbol}: {e}")
    return all_data

# Fetch profile data for the ten companies
fmp_data = get_company_profile()

# Display the profile data for each company
for symbol, profile in fmp_data.items():
    print(f"\nProfile for {symbol}:")
    print(pd.Series(profile).to_string())


Profile for AAPL:
symbol                                                            AAPL
price                                                           208.78
beta                                                             1.275
volAvg                                                        61422665
mktCap                                                   3118296324000
lastDiv                                                           1.01
range                                                     169.21-260.1
changes                                                          -2.48
companyName                                                 Apple Inc.
currency                                                           USD
cik                                                         0000320193
isin                                                      US0378331005
cusip                                                        037833100
exchange                                          NASDAQ G

In [6]:
def get_exchange_rate(base="USD", symbols="EUR"):
    url = f"https://openexchangerates.org/api/latest.json?app_id={OER_API_KEY}&base={base}&symbols={symbols}"
    response = requests.get(url)
    try:
        data = response.json()
        if 'error' in data:
            print("Error:", data.get('description', 'Unknown error'))
        return data
    except Exception as e:
        print("Exception:", e)
        return {}

exchange_rate_data = get_exchange_rate()
exchange_rate_data

{'disclaimer': 'Usage subject to terms: https://openexchangerates.org/terms',
 'license': 'https://openexchangerates.org/license',
 'timestamp': 1747731600,
 'base': 'USD',
 'rates': {'EUR': 0.888222}}

In [7]:
def get_worldbank_inflation_data(country="USA", indicator="FP.CPI.TOTL.ZG", start_year=2000, end_year=2024):
    url = f"http://api.worldbank.org/v2/country/{country}/indicator/{indicator}?format=json&date={start_year}:{end_year}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        if data and len(data) > 1 and isinstance(data[1], list):
            df = pd.DataFrame(data[1])
            df['date'] = pd.to_datetime(df['date'])
            df = df[['date', 'value', 'country', 'indicator']].dropna()
            print(f"World Bank Data Sample:\n{df.head().to_string()}")  # طباعة عينة
            return df
        else:
            print(f"No data available for {indicator} in {country}. Response: {data}")
            return pd.DataFrame()
    except Exception as e:
        print(f"Error fetching World Bank data: {e}")
        return pd.DataFrame()

def get_eurostat_inflation_data(dataset="prc_hicp_midx", geo="EA", start_period="2010-01"):
    url = f"https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/{dataset}?format=JSON&geo={geo}&unit=CP00&startPeriod={start_period}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        if 'value' in data and 'dimension' in data:
            dates = list(data['dimension']['time']['category']['index'].keys())
            values = [float(data['value'].get(str(i), None)) for i in range(len(dates)) if str(i) in data['value']]
            df = pd.DataFrame({
                'date': pd.to_datetime(dates),
                'value': values,
                'indicator': 'HICP',
                'region': geo
            }).dropna()
            print(f"Eurostat Data Sample:\n{df.head().to_string()}")  # طباعة عينة
            return df
        else:
            print(f"No data available for {dataset} in {geo}. Response: {data.get('error', 'No error message')}")
            return pd.DataFrame()
    except Exception as e:
        print(f"Error fetching Eurostat data: {e}")
        return pd.DataFrame()

# جمع البيانات في قاموس موحد
combined_data = {
    "csv_data": financial_data.to_dict(orient="records"),
    "stock_data_alpha": alpha_data,
    "company_profile_fmp": fmp_data,
    "exchange_rate": exchange_rate_data,
    "worldbank_inflation": get_worldbank_inflation_data().to_dict(orient="records"),
    "eurostat_inflation": get_eurostat_inflation_data().to_dict(orient="records")
}

# معاينة المفاتيح
print("Combined Data Keys:", combined_data.keys())

World Bank Data Sample:
        date     value                                 country                                                                   indicator
0 2024-01-01  2.949525  {'id': 'US', 'value': 'United States'}  {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}
1 2023-01-01  4.116338  {'id': 'US', 'value': 'United States'}  {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}
2 2022-01-01  8.002800  {'id': 'US', 'value': 'United States'}  {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}
3 2021-01-01  4.697859  {'id': 'US', 'value': 'United States'}  {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}
4 2020-01-01  1.233584  {'id': 'US', 'value': 'United States'}  {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}
Error fetching Eurostat data: 400 Client Error: Bad Request for url: https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/prc_h

In [8]:
from langchain.schema import Document

def convert_data_to_documents(combined_data):
    documents = []

    # تحويل البيانات المالية من CSV
    for row in combined_data["csv_data"][:100]:
        content = "\n".join([f"{key}: {value}" for key, value in row.items() if value is not None])
        documents.append(Document(page_content=content, metadata={"source": "csv"}))

    # بيانات Alpha Vantage
    alpha_text = str(combined_data["stock_data_alpha"])
    documents.append(Document(page_content=alpha_text, metadata={"source": "alpha_vantage"}))

    # بيانات FMP
    fmp_text = str(combined_data["company_profile_fmp"])
    documents.append(Document(page_content=fmp_text, metadata={"source": "fmp"}))

    # أسعار الصرف
    exchange_text = str(combined_data["exchange_rate"])
    documents.append(Document(page_content=exchange_text, metadata={"source": "exchange_rates"}))

    # بيانات World Bank مع سياق المخاطر
    for row in combined_data["worldbank_inflation"]:
        content = (f"Inflation Rate in {row.get('country', 'Unknown')}: {row.get('value', 'N/A')}% in {row.get('date', 'N/A').year}. "
                   f"Indicator: {row.get('indicator', 'N/A')}. "
                   f"High inflation can lead to reduced consumer purchasing power, increased borrowing costs due to central bank rate hikes, "
                   f"currency depreciation affecting exchange rates like USD/EUR, and potential economic slowdown or recession.")
        documents.append(Document(page_content=content, metadata={"source": "worldbank"}))

    # بيانات Eurostat مع سياق المخاطر
    for row in combined_data["eurostat_inflation"]:
        content = (f"Inflation Rate (HICP) in {row.get('region', 'Unknown')}: {row.get('value', 'N/A')}% in {row.get('date', 'N/A').strftime('%Y-%m')}. "
                   f"High inflation may reduce consumer spending, prompt tighter monetary policies, cause currency fluctuations in USD/EUR, "
                   f"and increase the risk of economic instability.")
        documents.append(Document(page_content=content, metadata={"source": "eurostat"}))

    return documents

documents = convert_data_to_documents(combined_data)
print(f"Total Documents: {len(documents)}")
print(f"World Bank Documents: {len([doc for doc in documents if doc.metadata['source'] == 'worldbank'])}")
print(f"Eurostat Documents: {len([doc for doc in documents if doc.metadata['source'] == 'eurostat'])}")

Total Documents: 128
World Bank Documents: 25
Eurostat Documents: 0


In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings

# تقطيع النصوص
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_docs = text_splitter.split_documents(documents)

# إنشاء Embeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

  embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)


In [10]:
from langchain.vectorstores import FAISS

# إنشاء FAISS من البيانات
vectorstore = FAISS.from_documents(split_docs, embeddings)
print("✅ Vector store created successfully.")

✅ Vector store created successfully.


In [11]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# تخصيص موجه
prompt_template = """استخدم القطع النصية التالية للإجابة على السؤال. إذا كانت القطع تحتوي على بيانات التضخم، اشرح المخاطر الاقتصادية لارتفاع التضخم، مثل انخفاض القوة الشرائية، ارتفاع أسعار الفائدة، تقلبات العملة، أو التباطؤ الاقتصادي. إذا لم يتم العثور على معلومات ذات صلة، اذكر ذلك صراحة.

السياق: {context}

السؤال: {question}

الإجابة بالعربية:"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# إعداد موديل GPT-4
llm = ChatOpenAI(model_name="gpt-4", openai_api_key=OPENAI_API_KEY)

# إنشاء السلسلة النهائية
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 10}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

  llm = ChatOpenAI(model_name="gpt-4", openai_api_key=OPENAI_API_KEY)


In [12]:
question = "ما هي المخاطر الاقتصادية إذا استمر التضخم في الارتفاع؟"
response = rag_chain.invoke({"query": question})
print("Result:", response["result"])
print("\nSource Documents:")
for doc in response["source_documents"]:
    print(f"Source: {doc.metadata['source']}, Content: {doc.page_content[:200]}...")

Result: المخاطر الاقتصادية لارتفاع التضخم تشمل:

1- انخفاض القوة الشرائية للمستهلكين: بينما ترتفع الأسعار، تقل القدرة على الشراء بنفس الكمية من النقود.

2- ارتفاع أسعار الفائدة: عادةً ما ترفع البنوك المركزية أسعار الفائدة لمحاولة الحد من التضخم، مما يزيد تكاليف الاقتراض.

3- تقلبات العملة: التضخم العالي يمكن أن يؤدي إلى تقلبات في سعر الصرف، مثل تقلبات سعر صرف الدولار الأمريكي مقابل اليورو.

4- التباطؤ الاقتصادي: التضخم العالي يمكن أن يؤدي إلى تباطؤ اقتصادي أو حتى ركود.

Source Documents:
Source: worldbank, Content: Inflation Rate in {'id': 'US', 'value': 'United States'}: 4.11633838374488% in 2023. Indicator: {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}. High inflation can lead to r...
Source: worldbank, Content: Inflation Rate in {'id': 'US', 'value': 'United States'}: 2.85267248150138% in 2007. Indicator: {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}. High inflation can lead to r...
Source: worldbank, Content: Inflation Rate 