### Steps
1) Gather Data (Choose asset --> Daily end of day prices (Time Period))
2) Measure **Price Differences (+/-)** & **Price Returns (%)** over time
3) For both differences & returns, fit a **Bell Curve (Gaussian)**
4) Compare **Real Data** to the **Model** (using histogram / bar chart)
5) **Simulate** new price paths (Bachelier Model) & (Samuelson Model)
6) Show and Compare

In [47]:
# import libraries
import yfinance as yf 
from dotenv import load_dotenv
import os
import numpy as np
import pandas as pd
import requests
from sklearn.model_selection import train_test_split

load_dotenv()
API_KEY = os.getenv('API_KEY')

def load_data(ticker):
    url = f"https://backend.simfin.com/api/v3/companies/prices/compact?ticker={ticker}"
    headers = {
        "accept": "application/json",
        "authorization": API_KEY
    }

    response = requests.get(url, headers = headers).json()
    json_data = response[0]

    df = pd.DataFrame(json_data['data'], columns = json_data['columns'])
    df.to_csv(f'data/raw/{ticker}-price-data.csv', index = False)

def clean_data(ticker, raw_data):
    df = pd.read_csv(raw_data)
    df = df[['Date', 'Last Closing Price']]
    pd.to_datetime(df['Date'])
    df.to_csv(f'data/processed/{ticker}-price-data.csv', index = False)

def train_test_split(data):
    ...


In [48]:
load_data('MSFT')
clean_data('MSFT', 'data/raw/MSFT-price-data.csv')

In [67]:
df = pd.read_csv('data/processed/MSFT-price-data.csv')
df.set_index(['Date'], inplace = True)

sample = df.head(100)
sample

Unnamed: 0_level_0,Last Closing Price
Date,Unnamed: 1_level_1
2018-05-22,97.50
2018-05-23,98.66
2018-05-24,98.31
2018-05-25,98.36
2018-05-29,98.01
...,...
2018-10-05,112.13
2018-10-08,110.85
2018-10-09,112.26
2018-10-10,106.16
