In [None]:
!pip install yfinance neo4j


In [3]:
import json
from neo4j import GraphDatabase
import yfinance as yf

In [6]:
import os

os.environ['NEO4J_BOLT_URL'] = 'neo4j+s://a1da7ed4.databases.neo4j.io'
os.environ['NEO4J_USERNAME'] = 'neo4j'
os.environ['NEO4J_PASSWORD'] = '7LDKGkEurm8Yge_dGcAn224X4om-eV9GkiaU0hQ2Oc'

bolt_url = os.environ.get('NEO4J_BOLT_URL')
username = os.environ.get('NEO4J_USERNAME')
password = os.environ.get('NEO4J_PASSWORD')

In [7]:
# Neo4j driver
driver = GraphDatabase.driver(bolt_url, auth=(username, password))

def test_connection():
    try:
        with driver.session() as session:
            greeting = session.run("RETURN 'Connection Successful' AS message").single()["message"]
            print(greeting)
    except Exception as e:
        print(f"Connection failed: {e}")

test_connection()

Connection Successful


In [28]:
def store_company_data(tx, company_symbol, historical_data):
    # Create or merge the company node
    tx.run("MERGE (c:Company {symbol: $symbol}) "
           "SET c.name = $name, c.industry = $industry, c.sector = $sector",
           symbol=company_symbol,
           name=historical_data['Name'],
           industry=historical_data['Industry'],
           sector=historical_data['Sector'])

    # nodes for historical prices as StockPrice nodes
    for date, price in historical_data['Historical Prices'].items():
        if price is not None:  # Check if price is valid
            tx.run("MERGE (p:StockPrice {date: $date}) "
                   "MERGE (c:Company {symbol: $symbol}) "
                   "MERGE (c)-[:HAS_PRICE]->(p) "
                   "SET p.price = $price",
                   date=date,
                   symbol=company_symbol,
                   price=price)

In [29]:
def fetch_and_store_data(companies):
    with driver.session() as session:
        for company in companies:
            try:
                # Fetch historical data
                stock_data = yf.Ticker(company)
                historical_data = stock_data.history(period="1y")
                # Prepare historical data for storage
                historical_prices = historical_data['Close'].to_dict()  # You can also fetch other data

                # Create a dictionary for additional company information
                company_info = {
                    'Name': stock_data.info.get('longName', ''),
                    'Industry': stock_data.info.get('industry', ''),
                    'Sector': stock_data.info.get('sector', ''),
                    'Historical Prices': historical_prices
                }

                # Store data in Neo4j
                session.write_transaction(store_company_data, company, company_info)
            except Exception as e:
                print(f"Error fetching or storing data for {company}: {e}")

In [None]:
companies = ['AAPL', 'MSFT', 'TSLA', 'GOOGL', 'AMZN', 'META', 'NFLX', 'NVDA', 'JPM', 'V']
fetch_and_store_data(companies)


In [9]:
# Function to explore data
def explore_data(query):
    with driver.session() as session:
        result = session.run(query)
        return [record for record in result]

In [10]:
count_query = "MATCH (c:Company) RETURN COUNT(c) AS companyCount"
company_count = explore_data(count_query)
print("Number of Companies:", company_count[0]['companyCount'])


Number of Companies: 10


In [11]:
list_companies_query = "MATCH (c:Company) RETURN c.name AS name, c.symbol AS symbol"
companies_list = explore_data(list_companies_query)
for company in companies_list:
    print(f"Company: {company['name']} (Symbol: {company['symbol']})")


Company: Microsoft Corporation (Symbol: MSFT)
Company: Tesla, Inc. (Symbol: TSLA)
Company: Alphabet Inc. (Symbol: GOOGL)
Company: Amazon.com, Inc. (Symbol: AMZN)
Company: Meta Platforms, Inc. (Symbol: META)
Company: Netflix, Inc. (Symbol: NFLX)
Company: NVIDIA Corporation (Symbol: NVDA)
Company: JPMorgan Chase & Co. (Symbol: JPM)
Company: Visa Inc. (Symbol: V)
Company: Apple Inc. (Symbol: AAPL)


In [None]:
!pip install transformers

In [26]:
import re
from transformers import BartForConditionalGeneration, BartTokenizer
from datetime import datetime
from neo4j import GraphDatabase

In [25]:
# pre-trained BART model and tokenizer
model_name = "facebook/bart-large"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)



In [29]:
# retrieve data from Neo4j
def retrieve_data(search_query):
    with driver.session() as session:
        cypher_query = """
        MATCH (c:Company)-[:HAS_PRICE]->(p:StockPrice)
        WHERE c.name CONTAINS $search_query OR c.symbol CONTAINS $search_query
        RETURN c.symbol AS CompanySymbol, c.name AS CompanyName, p.date AS Date, p.price AS Price
        ORDER BY p.date DESC
        LIMIT 10
        """
        result = session.run(cypher_query, search_query=search_query)

        retrieved_data = []
        for record in result:
            date_obj = record["Date"]  # Neo4j DateTime object
            formatted_date = date_obj.strftime('%Y-%m-%d')
            retrieved_data.append({
                'company': record['CompanyName'],
                'symbol': record['CompanySymbol'],
                'date': formatted_date,
                'price': record['Price']
            })
        return retrieved_data

In [30]:
# memory as a dictionary
chat_memory = {
    "last_company": None,  # last queried company
    "history": []          # conversation history
}

# Update chat memory
def update_memory(company_name, user_prompt, bot_response):
    chat_memory["last_company"] = company_name
    chat_memory["history"].append({"user": user_prompt, "bot": bot_response})

In [31]:
#  clean the generated response
def clean_response(response):
    cleaned_response = re.sub(r'[^\x20-\x7E]', '', response)  # Keep only printable ASCII characters
    return cleaned_response

# Generate response based on data and prompt
def generate_response(retrieved_data, prompt):
    context = "\n".join([f"On {data['date']}, {data['company']} closed at ${data['price']:.2f}." for data in retrieved_data])
    narrative = f"{prompt}\nHere’s the stock performance over the last year:\n{context}"

    inputs = tokenizer(narrative, return_tensors="pt", max_length=1024, truncation=True)
    outputs = model.generate(**inputs, max_new_tokens=50)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return clean_response(response)

In [32]:
# Recognized companies list
RECOGNIZED_COMPANIES = ["Apple", "Meta", "Google", "Microsoft", "Amazon", "Tesla", "Netflix"]

# Function to handle user query, with memory for context
def user_query(prompt):
    # Extract keywords (company names or symbols) from the prompt
    matched_companies = [company for company in RECOGNIZED_COMPANIES if company.lower() in prompt.lower()]

    # Default to the last company if none found in the prompt
    if not matched_companies:
        keywords = chat_memory["last_company"]
        if not keywords:
            return "Please specify a company."
    else:
        keywords = matched_companies[0]

    # Retrieve data and generate response
    retrieved_data = retrieve_data(keywords)
    if not retrieved_data:
        return f"No data found for the company '{keywords}'."

    response = generate_response(retrieved_data, prompt)

    # Update memory
    update_memory(keywords, prompt, response)

    return response

In [33]:
# Example usage in a conversational loop
user_input = "Show Apple stock performance on  2024"
output = user_query(user_input)
print(output)

Show Apple stock performance on  2024Heres the stock performance over the last year: fixmeOn 2024-10-31, Apple Inc. closed at $290.62.


In [34]:
# Follow-up query example using memory
user_input_followup = "What about the last six months?"
output_followup = user_query(user_input_followup)
print(output_followup)

What about the last six months?=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-On 2024-10-17, Apple Inc. closed at $281.62.
