#  📊 Data Ingestion and Quality Checks
# 
# **IMPORTANT**: This notebook **USES** the existing `src/data.py` 
# 
#  Objectives:
# - ✅ **Tests** your existing `src/data.py` functions
# - ✅ **Visualizes** downloaded data
# - ✅ **Compares** multiple stocks
# - ✅ **Documents** data quality findings

# Import and Setup

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from data import download_multiple_tickers, load_raw_data, validate_data_quality, calculate_returns
import sys
sys.path.append('../src')

# Configure display
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8')

print("✅ Imports complete - using existing src/data.py functions")

#  Download Sample Data 


In [None]:
tickers = ['AAPL', 'GOOGL', 'MSFT']
results = download_multiple_tickers(
    tickers, '2020-01-01', '2023-12-31', '../data/raw')
print("Download completed!")
print("Saved files:", results)

# Load and Explore

In [None]:
aapl_data = load_raw_data('../data/raw/AAPL.csv')
print(f"AAPL data shape: {aapl_data.shape}")
print(f"Date range: {aapl_data.index.min()} to {aapl_data.index.max()}")
aapl_data.head()

#  Data Quality Assessment

In [None]:
quality_results = validate_data_quality(aapl_data, detailed=True)
print("Data Quality Report:")
print(
    f"Overall Quality: {'✅ PASS' if quality_results['overall'] else '❌ ISSUES'}")
for issue in quality_results['issues']:
    print(f"- {issue}")

#  Basic Visualizations 

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
aapl_data['Close'].plot(title='AAPL Close Price', ax=axes[0, 0])
aapl_data['Volume'].plot(title='AAPL Volume', ax=axes[0, 1])
calculate_returns(aapl_data['Close']).plot(
    title='AAPL Daily Returns', ax=axes[1, 0])
aapl_data['Close'].pct_change().hist(bins=50, ax=axes[1, 1])
axes[1, 1].set_title('Returns Distribution')
plt.tight_layout()
plt.show()

#  Multi-Stock Comparison 

In [None]:
all_data = {}
for ticker in tickers:
    all_data[ticker] = load_raw_data(f'../data/raw/{ticker}.csv')['Close']

comparison_df = pd.DataFrame(all_data)
comparison_df.plot(title='Stock Price Comparison', figsize=(12, 6))
plt.show()