# Assessment Problems

## Problem 1: Data from yfinance


https://github.com/ranaroussi/yfinance


In [39]:
# Dates and times.
import datetime as dt

# Data frames.
import pandas as pd

# Operating system.
import os

# Yahoo finance data.
import yfinance as yf


In [40]:
# Tickers:
 # A list of stock symbols used to find data from yfinance

# Get data : 
# The get_data function enables retrieval of pricing snapshots, as well as fundamental and reference data, in a single call.
# See: https://cdn.refinitiv.com/public/rd-lib-python-doc/1.0.0.0/book/en/sections/access-layer/access/get-data-function.html
# period an interval used to obtain historical data

# Download data:
# This function uses the yfinance Python library to download historical stock data.
# See: https://medium.com/%40anjalivemuri97/day-4-fetching-historical-stock-data-with-yfinance-f45f3bd8b9c6
# I use auto_adjust=True, to omit the future warning
# See: https://github.com/ranaroussi/yfinance/blob/0713d9386769b168926d3959efd8310b56a33096/yfinance/utils.py#L445-L462

# DataFrame:
# It’s widely used for data analysis, cleaning, and visualization.Supports filtering, sorting, aggregation, and analysis
# See: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html

In [41]:
# Get historical data for multiple tickers at once:
tickers = ["META", "AAPL", "AMZN", "NFLX", "GOOGL"]

# Get data:
def get_data(tickers, period="5d", interval="1h"): 
    data = yf.download(tickers, period=period, interval=interval, group_by='ticker', auto_adjust=True) 
    return data
df=get_data(tickers,period="5d", interval="1d")

[*********************100%***********************]  5 of 5 completed


In [42]:
# Saving data into csv file:
# See: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html 

# Date time:
# Used to record the exact date and time
# See: https://docs.python.org/3/library/datetime.html

In [43]:
from datetime import datetime

def save_data(df):
    folder = "data"
    os.makedirs(folder, exist_ok=True)

    # Generate timestamp filename
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    filename = f"{timestamp}.csv"

    # Full path
    filepath = os.path.join(folder, filename)

    # Save dataframe
    df.to_csv(filepath, index=False)

    print(f"Saved file: {filepath}")
    return filepath
save_data(df)

Saved file: data\20251203-113659.csv


'data\\20251203-113659.csv'

## Problem 2: Plotting Data

In [55]:
import datetime as dt
import matplotlib.pyplot as plt
import os
import matplotlib
matplotlib.use("Agg")

In [45]:
tickers = ["META", "AAPL", "AMZN", "NFLX", "GOOGL"]
def get_data(tickers, period="5d", interval="1h"): 
    data = yf.download(tickers, period=period, interval=interval, group_by='ticker', auto_adjust=True) 
    return data
df=get_data(tickers,period="5d", interval="1d")
df.head()

[*********************100%***********************]  5 of 5 completed


Ticker,GOOGL,GOOGL,GOOGL,GOOGL,GOOGL,NFLX,NFLX,NFLX,NFLX,NFLX,...,AMZN,AMZN,AMZN,AMZN,AMZN,AAPL,AAPL,AAPL,AAPL,AAPL
Price,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-11-25,326.209991,328.829987,317.649994,323.440002,88632100,106.120003,106.300003,103.82,104.400002,35122600,...,226.380005,230.520004,223.800003,229.669998,39379300,275.269989,280.380005,275.25,276.970001,46914200
2025-11-26,320.679993,324.5,316.790009,319.950012,51373400,105.739998,106.949997,105.220001,106.139999,27951000,...,230.740005,231.75,228.770004,229.160004,38497900,276.959991,279.529999,276.630005,277.549988,33431400
2025-11-28,323.369995,326.850006,316.790009,320.179993,26018600,106.440002,107.940002,106.239998,107.580002,15021600,...,231.240005,233.289993,230.220001,233.220001,20292300,277.26001,279.0,275.98999,278.850006,20135600
2025-12-01,317.700012,319.850006,313.890015,314.890015,41183000,106.510002,109.339996,106.309998,109.129997,24873400,...,233.220001,235.800003,232.25,233.880005,42904000,278.01001,283.420013,276.140015,283.100006,46587700
2025-12-02,316.73999,318.380005,313.910004,315.809998,35801500,109.209999,109.730003,107.519997,109.349998,25734500,...,235.009995,238.970001,233.550003,234.419998,45622500,283.0,287.399994,282.630005,286.190002,53615500


In [46]:
df.columns

MultiIndex([('GOOGL',   'Open'),
            ('GOOGL',   'High'),
            ('GOOGL',    'Low'),
            ('GOOGL',  'Close'),
            ('GOOGL', 'Volume'),
            ( 'NFLX',   'Open'),
            ( 'NFLX',   'High'),
            ( 'NFLX',    'Low'),
            ( 'NFLX',  'Close'),
            ( 'NFLX', 'Volume'),
            ( 'META',   'Open'),
            ( 'META',   'High'),
            ( 'META',    'Low'),
            ( 'META',  'Close'),
            ( 'META', 'Volume'),
            ( 'AMZN',   'Open'),
            ( 'AMZN',   'High'),
            ( 'AMZN',    'Low'),
            ( 'AMZN',  'Close'),
            ( 'AMZN', 'Volume'),
            ( 'AAPL',   'Open'),
            ( 'AAPL',   'High'),
            ( 'AAPL',    'Low'),
            ( 'AAPL',  'Close'),
            ( 'AAPL', 'Volume')],
           names=['Ticker', 'Price'])

In [58]:
df.index    

DatetimeIndex(['2025-11-25 14:30:00+00:00', '2025-11-25 15:30:00+00:00',
               '2025-11-25 16:30:00+00:00', '2025-11-25 17:30:00+00:00',
               '2025-11-25 18:30:00+00:00', '2025-11-25 19:30:00+00:00',
               '2025-11-25 20:30:00+00:00', '2025-11-26 14:30:00+00:00',
               '2025-11-26 15:30:00+00:00', '2025-11-26 16:30:00+00:00',
               '2025-11-26 17:30:00+00:00', '2025-11-26 18:30:00+00:00',
               '2025-11-26 19:30:00+00:00', '2025-11-26 20:30:00+00:00',
               '2025-11-28 14:30:00+00:00', '2025-11-28 15:30:00+00:00',
               '2025-11-28 16:30:00+00:00', '2025-12-01 14:30:00+00:00',
               '2025-12-01 15:30:00+00:00', '2025-12-01 16:30:00+00:00',
               '2025-12-01 17:30:00+00:00', '2025-12-01 18:30:00+00:00',
               '2025-12-01 19:30:00+00:00', '2025-12-01 20:30:00+00:00',
               '2025-12-02 14:30:00+00:00', '2025-12-02 15:30:00+00:00',
               '2025-12-02 16:30:00+00:00', '2025-1

In [60]:
df[[('Close', 'AMZN'),
    ('Close', 'META'),
    ('Close', 'GOOGL'),
    ('Close', 'AAPL'),
    ('Close', 'NFLX')]].plot(figsize=(12,6))


<Axes: xlabel='Datetime'>

In [61]:
# Save plot
plots_folder = "plots"
os.makedirs(plots_folder, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
plot_filename = os.path.join(plots_folder, f"{timestamp}.png")
plt.savefig(plot_filename, dpi=300)  
plt.close()
print(f"Saved plot: {plot_filename}")

Saved plot: plots\20251203-120044.png


## Problem 3: Script

In [49]:
#! /usr/bin/env python

# Dates and time
import datetime as dt

#Yahoo Finance data
import yfinance as yf

# Get data
df= yf.download(["META", "AAPL", "AMZN", "NFLX", "GOOGL"], period="5d", interval="1h", auto_adjust=True)

# Current data and time
now=dt.datetime.now()

# File name
filename="data/" + now.strftime("%Y%m%d-%H%M%S") + ".csv"

# Save data as CSV file
df.to_csv(filename)

[*********************100%***********************]  5 of 5 completed


## Problem 4: Automation

# ...opis

## END