In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('../data/stock_prices.csv')

# Display the first few rows of the dataset
print("First few rows of the dataset:")
display(data.head())

# Basic dataset statistics
print("\nDataset Statistics:")
display(data.describe())

# Check for missing values
print("\nMissing Values:")
print(data.isnull().sum())

# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Plotting stock closing prices over time
plt.figure(figsize=(12, 6))
plt.plot(data['Date'], data['Close'], label='Close Price', color='blue')
plt.title('Stock Close Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

# Calculate and plot moving averages
data['SMA_5'] = data['Close'].rolling(window=5).mean()
data['SMA_20'] = data['Close'].rolling(window=20).mean()

plt.figure(figsize=(12, 6))
plt.plot(data['Date'], data['Close'], label='Close Price', color='blue')
plt.plot(data['Date'], data['SMA_5'], label='5-Day SMA', color='orange')
plt.plot(data['Date'], data['SMA_20'], label='20-Day SMA', color='green')
plt.title('Stock Prices with Moving Averages')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

# Heatmap of correlations
plt.figure(figsize=(8, 6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Volume distribution plot
plt.figure(figsize=(10, 6))
sns.histplot(data['Volume'], bins=30, kde=True, color='purple')
plt.title('Volume Distribution')
plt.xlabel('Volume')
plt.ylabel('Frequency')
plt.show()

# Boxplot of stock prices
plt.figure(figsize=(10, 6))
sns.boxplot(data=data[['Open', 'High', 'Low', 'Close']], palette='Set2')
plt.title('Boxplot of Stock Prices')
plt.show()
