# Stock Price Prediction Data Exploration

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Configure visualization
plt.style.use('seaborn')
sns.set(font_scale=1.2)
plt.figure(figsize=(16, 8))

In [None]:
# Load raw stock data
stocks_df = pd.read_csv('../data/raw/stocks_raw.csv')
market_indices = pd.read_csv('../data/raw/market_indices.csv')

# Basic dataset information
print(stocks_df.info())
print(stocks_df.describe())

In [None]:
# Check missing values
missing_data = stocks_df.isnull().sum()
print("Missing Values:\n", missing_data)

# Handle missing data strategy
stocks_df.dropna(inplace=True)

In [None]:
# Time series visualization
stocks_df['Date'] = pd.to_datetime(stocks_df['Date'])
stocks_df.set_index('Date', inplace=True)

plt.figure(figsize=(20, 10))
stocks_df['Close'].plot()
plt.title('Stock Price Closing Trend')
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

In [None]:
# Correlation Analysis
correlation_matrix = stocks_df.corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Feature Correlation Matrix')
plt.show()

In [None]:
# Advanced Feature Engineering
def create_technical_indicators(df):
    # Moving Averages
    df['MA_20'] = df['Close'].rolling(window=20).mean()
    df['MA_50'] = df['Close'].rolling(window=50).mean()
    
    # Relative Strength Index (RSI)
    delta = df['Close'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    relative_strength = avg_gain / avg_loss
    df['RSI'] = 100.0 - (100.0 / (1.0 + relative_strength))
    
    return df

stocks_df = create_technical_indicators(stocks_df)

In [None]:
# Volatility Analysis
stocks_df['Returns'] = stocks_df['Close'].pct_change()
volatility = stocks_df['Returns'].std() * np.sqrt(252)  # Annualized
print(f"Annualized Volatility: {volatility:.2%}")

In [None]:
# Save processed data
stocks_df.to_csv('../data/processed/processed_stocks.csv')
print("Data exploration and preprocessing completed.")