In [None]:
# Data Exploration Notebook: data_exploration.ipynb

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
data = pd.read_csv('../data/stock_prices.csv', parse_dates=['Date'], index_col='Date')

# Visualize stock price trends
plt.figure(figsize=(14, 7))
plt.plot(data['Close'], label='Close Price History')
plt.title('Stock Close Price History')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

# Correlation heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation')
plt.show()

# Train/Test Split Visualization
from sklearn.model_selection import train_test_split
X_train, X_test, _, _ = train_test_split(data[['Close']], data[['Close']], test_size=0.2, random_state=42)
plt.figure(figsize=(14, 7))
plt.plot(X_train.index, X_train['Close'], label='Training Data', color='blue')
plt.plot(X_test.index, X_test['Close'], label='Testing Data', color='red')
plt.title('Train/Test Split')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

# Display the first few rows of the data
data.head()
