# Companies Stock EDA

Load per-company CSVs from ../Data/yfinance_data, compute basic summaries, save outputs to ../outputs, and show quick plots.

In [2]:
%pip install -r ../requirements.txt

^C
Note: you may need to restart the kernel to use updated packages.


In [1]:
# Imports

import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='whitegrid')

In [4]:
# Configure paths and ensure outputs directory
repo_root = os.path.join('..')
data_dir = os.path.join(repo_root, 'Data', 'yfinance_data')
outputs_dir = os.path.join(repo_root, 'outputs')
os.makedirs(outputs_dir, exist_ok=True)

# Find CSV files and read them into a dict keyed by ticker
files = sorted(glob.glob(os.path.join(data_dir, '*_historical_data.csv')))
frames = {}
for f in files:
    # filename pattern: TICKER_historical_data.csv
    base = os.path.basename(f)
    ticker = base.split('_')[0].upper()
    df = pd.read_csv(f)
    # normalize Date if present
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.set_index('Date')
    frames[ticker] = df

print('Found tickers:', list(frames.keys()))

Found tickers: []


In [5]:
# Save per-ticker summaries and quick plots (Close price) to outputs/
summary_rows = []
for ticker, df in frames.items():
    rows, cols = df.shape
    # null counts and describe saved
    df.isnull().sum().to_csv(os.path.join(outputs_dir, f'{ticker}_nulls.csv'))
    df.describe().transpose().to_csv(os.path.join(outputs_dir, f'{ticker}_describe.csv'))
    summary_rows.append({'ticker': ticker, 'rows': rows, 'cols': cols})
    # save close price plot if present
    if 'Close' in df.columns:
        plt.figure(figsize=(10,3))
        df['Close'].plot(title=f'{ticker} Close Price')
        plt.tight_layout()
        plt.savefig(os.path.join(outputs_dir, f'{ticker}_close.png'))
        plt.close()

pd.DataFrame(summary_rows).to_csv(os.path.join(outputs_dir, 'stocks_shapes.csv'), index=False)
print('Wrote summaries and plots to', outputs_dir)

Wrote summaries and plots to ..\outputs


In [7]:
# Display inline examples: head and Close price comparison for available tickers
display_count = 0
for ticker, df in frames.items():
    if display_count >= 3:
        break
    print(f'--- {ticker} head ---')
    display(df.head(3))
    if 'Close' in df.columns:
        plt.figure(figsize=(8,2.5))
        df['Close'].plot(title=f'{ticker} Close (sample)')
        plt.tight_layout()
        plt.show()
    display_count += 1

In [6]:
df_apple = pd.read_csv('../Data/yfinance_data/AAPL_historical_data.csv')

df_apple.head()

FileNotFoundError: [Errno 2] No such file or directory: '../Data/yfinance_data/AAPL_historical_data.csv'

Collecting pytest (from -r ../requirements.txt (line 2))
  Using cached pytest-9.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting nltk (from -r ../requirements.txt (line 13))
  Using cached nltk-3.9.2-py3-none-any.whl.metadata (3.2 kB)
Collecting textblob (from -r ../requirements.txt (line 14))
  Using cached textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB)
Collecting pynance (from -r ../requirements.txt (line 18))
  Using cached pynance-1.0.0-py3-none-any.whl.metadata (1.2 kB)
Collecting spacy (from -r ../requirements.txt (line 22))
  Using cached spacy-3.8.11-cp313-cp313-win_amd64.whl.metadata (28 kB)
Collecting gensim (from -r ../requirements.txt (line 23))
  Using cached gensim-4.4.0-cp313-cp313-win_amd64.whl.metadata (8.6 kB)
Collecting plotly (from -r ../requirements.txt (line 26))
  Using cached plotly-6.5.0-py3-none-any.whl.metadata (8.5 kB)
Collecting dash (from -r ../requirements.txt (line 27))
  Using cached dash-3.3.0-py3-none-any.whl.metadata (11 kB)
Collecting prophet 


[notice] A new release of pip is available: 24.3.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Notes:
- Outputs are written to ../outputs (per-ticker _nulls.csv, _describe.csv, _close.png, and stocks_shapes.csv).
- Adjust the data_dir variable if your CSV files are in a different location.