## Install yfinance

In [1]:
!pip install yfinance pymongo pandas

Collecting yfinance
  Downloading yfinance-1.0-py2.py3-none-any.whl.metadata (6.0 kB)
Collecting pymongo
  Downloading pymongo-4.16.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (10.0 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.7-py3-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.19.0-py3-none-any.whl.metadata (7.0 kB)
Collecting curl_cffi<0.14,>=0.7 (from yfinance)
  Downloading curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting websockets>=13.0 (from yfinance)
  Downloading websockets-16.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (6.8 kB)
Collecting dnspython<3.0.0,>=2.6.1 (from pymongo)
  Downloading dnspython-2.8.0-py3-no

## Gather data from yfinance


In [2]:
import yfinance as yf
from pymongo import MongoClient
import pandas as pd
import os

# 1. Połączenie z MongoDB (używamy nazwy usługi z docker-compose)
client = MongoClient("mongodb://mongodb:27017/") 
db = client.stock_database
collection = db.prices

# Czyścimy bazę przed nowym pobraniem (opcjonalnie)
collection.delete_many({})

assets = ['AAPL', 'NVDA', 'TSLA', 'BTC-USD', 'ETH-USD', 'SOL-USD']

# Tworzymy folder na JSONy jeśli nie istnieje
if not os.path.exists('data'):
    os.makedirs('data')

for asset in assets:
    print(f"Pobieram: {asset}...")
    
    # Pobieramy dane (skoro mamy styczeń 2026, bierzemy pełny 2025)
    ticker = yf.Ticker(asset)
    df = ticker.history(start="2025-01-01", end="2025-12-31")
    
    # Obróbka danych
    df.reset_index(inplace=True)
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
    df['asset'] = asset
    
    # Zapis do MongoDB
    records = df.to_dict('records')
    if records:
        collection.insert_many(records)
        
        # Zapis do JSON w folderze work/data
        df.to_json(f"data/{asset}_2025.json", orient='records', indent=4)

print("\n--- GOTOWE! Dane są w MongoDB i folderze data ---")

# Wyświetlmy próbkę danych z bazy dla testu
sample = pd.DataFrame(list(collection.find({'asset': 'AAPL'}).limit(5)))
sample

Pobieram: AAPL...
Pobieram: NVDA...
Pobieram: TSLA...
Pobieram: BTC-USD...
Pobieram: ETH-USD...
Pobieram: SOL-USD...

--- GOTOWE! Dane są w MongoDB i folderze data ---


Unnamed: 0,_id,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,asset
0,6963dc8cd98665a62ee5a2ee,2025-01-02,247.80922,247.978468,240.731247,242.752106,55740700,0.0,0.0,AAPL
1,6963dc8cd98665a62ee5a2ef,2025-01-03,242.264297,243.080598,240.800915,242.264297,40244100,0.0,0.0,AAPL
2,6963dc8cd98665a62ee5a2f0,2025-01-06,243.210016,246.216423,242.105013,243.896912,45045600,0.0,0.0,AAPL
3,6963dc8cd98665a62ee5a2f1,2025-01-07,241.886014,244.44445,240.263363,241.119492,40856000,0.0,0.0,AAPL
4,6963dc8cd98665a62ee5a2f2,2025-01-08,240.830782,242.612732,238.969207,241.607269,37628900,0.0,0.0,AAPL


## Test Mongo data storage

In [3]:
from pymongo import MongoClient
import pandas as pd

# Połączenie
client = MongoClient("mongodb://mongodb:27017/")
db = client.stock_database
collection = db.prices

# 1. Sprawdźmy ile w ogóle mamy dokumentów w bazie
total = collection.count_documents({})
print(f"Łączna liczba wpisów w bazie: {total}")

# 2. Sprawdźmy jakie mamy instrumenty (assets)
assets_in_db = collection.distinct("asset")
print(f"Instrumenty w bazie: {assets_in_db}")

# 3. Wyciągnijmy dane dla konkretnego instrumentu (np. NVDA) i wrzućmy do tabeli
df_nvda = pd.DataFrame(list(collection.find({"asset": "NVDA"}).limit(10)))
df_nvda

Łączna liczba wpisów w bazie: 1839
Instrumenty w bazie: ['AAPL', 'BTC-USD', 'ETH-USD', 'NVDA', 'SOL-USD', 'TSLA']


Unnamed: 0,_id,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,asset
0,6963dc8cd98665a62ee5a3e7,2025-01-02,135.962805,138.842022,134.593185,138.272171,198247200,0.0,0.0,NVDA
1,6963dc8cd98665a62ee5a3e8,2025-01-03,139.971709,144.860371,139.691787,144.430496,229322500,0.0,0.0,NVDA
2,6963dc8cd98665a62ee5a3e9,2025-01-06,148.549363,152.118394,147.779585,149.38913,265377400,0.0,0.0,NVDA
3,6963dc8cd98665a62ee5a3ea,2025-01-07,152.988143,153.088122,139.9717,140.101669,351782200,0.0,0.0,NVDA
4,6963dc8cd98665a62ee5a3eb,2025-01-08,142.541027,143.910648,137.522395,140.071701,227349900,0.0,0.0,NVDA
5,6963dc8cd98665a62ee5a3ec,2025-01-10,137.412421,139.881747,134.183308,135.872849,207602500,0.0,0.0,NVDA
6,6963dc8cd98665a62ee5a3ed,2025-01-13,129.954469,133.453512,129.474589,133.193573,204808900,0.0,0.0,NVDA
7,6963dc8cd98665a62ee5a3ee,2025-01-14,136.012804,136.342716,130.014445,131.723969,195590500,0.0,0.0,NVDA
8,6963dc8cd98665a62ee5a3ef,2025-01-15,133.613455,136.412693,131.2541,136.202759,185217300,0.0,0.0,NVDA
9,6963dc8cd98665a62ee5a3f0,2025-01-16,138.602083,138.712054,133.453498,133.533478,209235600,0.0,0.0,NVDA
