In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data (replace with your actual data loading)
data = pd.read_csv("data.csv")  # Or data.parquet, depending on your file

# Display the first few rows
print(data.head())

# Get information about the data types and missing values
print(data.info())

# Summary statistics for numerical columns
print(data.describe())

# --- Visualizations ---

# 1. Time Series Plot of Close Price
plt.figure(figsize=(10, 6))
plt.plot(data['Date'], data['Close'])  # Assuming you have a 'Date' and 'Close' column
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.title("Close Price Over Time")
plt.grid(True)
plt.show()


# 2. Histogram of Volume
plt.figure(figsize=(10, 6))
sns.histplot(data['Volume'], bins=30, kde=True)
plt.xlabel("Volume")
plt.ylabel("Frequency")
plt.title("Volume Distribution")
plt.show()

# 3. Boxplot of Returns (if you have a Returns column)
if 'Returns' in data.columns:
    plt.figure(figsize=(8, 6))
    sns.boxplot(y=data['Returns'])
    plt.ylabel("Returns")
    plt.title("Boxplot of Returns")
    plt.show()


# 4. Correlation Matrix Heatmap (for numerical columns)
numeric_data = data.select_dtypes(include=np.number) # Select only numeric columns
corr_matrix = numeric_data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
plt.title("Correlation Matrix")
plt.show()
