## 1.1_Enrico_Raw_Stocks.ipynb
- Loads daily stock prices using the SimFin API.

- Saves data to raw_stock_prices.csv.

- Then reads it again and does basic inspection (head(), describe(), isna().mean()).

- Focuses on a single stock (CRWD) for example analysis.

In [10]:
import pandas as pd
import simfin as sf
from simfin.names import CLOSE, OPEN, HIGH, LOW, VOLUME, TICKER
import os
from dotenv import load_dotenv

load_dotenv()
SIMFIN_API_KEY = os.getenv('API_KEY')

def load_data():
    """
    Loads daily stock prices for all US companies from SimFin 
    and saves the raw data to a specified directory for processing.
    """

    # Set SimFin API key (Replace with your actual key)
    sf.set_api_key(SIMFIN_API_KEY)

    # Set SimFin data directory (ensure it exists)
    sf.set_data_dir("data/RAW/")

    # Load daily stock prices for all US companies
    print("📥 Downloading US stock market data...")
    df_prices = sf.load_shareprices(market="us", variant="daily")

    # Reset index to make 'Date' a normal column
    df_prices = df_prices.reset_index()

    # Define save path - ADAPT TO YOUR LOCAL ENVIRONMENT
    save_path = "data/RAW/raw_stock_prices.csv"

    # Save raw data to CSV
    df_prices.to_csv(save_path, index=False)

    print(f"✅ Data saved to: {save_path}")

# Run the function
load_data()

📥 Downloading US stock market data...
Dataset "us-shareprices-daily" on disk (0 days old).
- Loading from disk ... 

  df = pd.read_csv(path, sep=';', header=0,


Done!
✅ Data saved to: data/RAW/raw_stock_prices.csv


In [2]:
# Define the file path where the raw stock prices were saved
file_path = "/Users/enricotajanlangit/Desktop/Python 2/python-group-project-2/raw_stock_prices.csv"

# Read the saved CSV file into a DataFrame
df_prices = pd.read_csv(file_path)

# Display the first few rows to confirm it loaded correctly
df_prices.head()

Unnamed: 0,Ticker,Date,SimFinId,Open,High,Low,Close,Adj. Close,Volume,Dividend,Shares Outstanding
0,A,2019-04-15,45846,81.0,81.13,79.91,80.4,77.22,1627268,,317515869.0
1,A,2019-04-16,45846,80.82,80.96,77.19,77.55,74.48,3441597,,317515869.0
2,A,2019-04-17,45846,78.15,78.32,74.46,75.43,72.44,4471971,,317515869.0
3,A,2019-04-18,45846,75.73,76.54,75.31,76.17,73.16,2874195,,317515869.0
4,A,2019-04-22,45846,75.93,76.72,75.13,75.57,72.58,2016043,,317515869.0


In [3]:
# example DF for CRWD ticker only 
df_crwd = df_prices[df_prices["Ticker"] == "CRWD"]

# Display the filtered DataFrame
df_crwd.head()

Unnamed: 0,Ticker,Date,SimFinId,Open,High,Low,Close,Adj. Close,Volume,Dividend,Shares Outstanding
1326081,CRWD,2019-06-12,1039026,63.5,67.0,56.0,58.0,58.0,19449162,,48127000.0
1326082,CRWD,2019-06-13,1039026,63.86,69.67,61.6,67.56,67.56,10923944,,48127000.0
1326083,CRWD,2019-06-14,1039026,69.78,70.79,63.02,64.16,64.16,6264212,,48127000.0
1326084,CRWD,2019-06-17,1039026,64.75,70.01,62.0,70.01,70.01,3247471,,48127000.0
1326085,CRWD,2019-06-18,1039026,73.0,78.7,71.5,76.5,76.5,6612626,,48127000.0


In [4]:
df_prices.describe()

Unnamed: 0,SimFinId,Open,High,Low,Close,Adj. Close,Volume,Dividend,Shares Outstanding
count,5814288.0,5814288.0,5814288.0,5814288.0,5814288.0,5814288.0,5814288.0,35458.0,5286531.0
mean,5840181.0,16479.46,16579.38,16342.67,16435.71,16433.55,1777196.0,0.455116,603440100000.0
std,6016586.0,1232507.0,1235688.0,1228048.0,1230774.0,1230775.0,28854910.0,8.042175,61956250000000.0
min,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,625539.0,7.86,8.07,7.62,7.85,7.38,35745.0,0.12,18028310.0
50%,1300071.0,20.36,20.8,19.95,20.34,18.94,242437.0,0.25,48928060.0
75%,11035460.0,51.76,52.7,50.8,51.74,48.8,932963.5,0.45,130905100.0
max,18493380.0,100000000.0,100000000.0,100000000.0,100000000.0,100000000.0,18489980000.0,1500.0,6667887000000000.0


In [5]:
df_prices.isna().mean() # 99% of the dividend column is null for all tickers 

Ticker                0.000000
Date                  0.000000
SimFinId              0.000000
Open                  0.000000
High                  0.000000
Low                   0.000000
Close                 0.000000
Adj. Close            0.000000
Volume                0.000000
Dividend              0.993902
Shares Outstanding    0.090769
dtype: float64

In [6]:
df_crwd.isna().mean() # 100% of the divident column is null for CRWD column 

Ticker                0.0
Date                  0.0
SimFinId              0.0
Open                  0.0
High                  0.0
Low                   0.0
Close                 0.0
Adj. Close            0.0
Volume                0.0
Dividend              1.0
Shares Outstanding    0.0
dtype: float64