# 1. Importing Required Libraries

In [None]:
# Importing libraries
%pip install pandas numpy matplotlib seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

# Display settings
%matplotlib inline
plt.rcParams["figure.figsize"] = (12, 6)


# 2. Loading Data

In [None]:
# Load data (adjust file_path to the location of your CSV file)
file_path = "data/input/historical_data.csv"
date_column = "Date"
price_column = "Close"

# Read and preprocess data
data = pd.read_csv(file_path)
data[date_column] = pd.to_datetime(data[date_column])
data.set_index(date_column, inplace=True)
data = data.rename(columns={price_column: "close"})

# Display first few rows
print(data.head())


# 3. Summary Statistics

In [None]:
# Summary statistics for the data
print("Summary Statistics:")
print(data.describe())

# Check for missing values
missing_values = data.isnull().sum()
print("\nMissing Values:\n", missing_values)


# 4. Visualizing the Closing Price

In [None]:
# Line plot of closing prices
plt.figure(figsize=(12, 6))
plt.plot(data.index, data["close"], label="Closing Price", color="blue")
plt.title("Time Series of Closing Prices")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()


# 5. Rolling Statistics

In [None]:
# Add rolling means
data["short_ma"] = data["close"].rolling(window=20).mean()
data["long_ma"] = data["close"].rolling(window=50).mean()

# Plot rolling means
plt.figure(figsize=(12, 6))
plt.plot(data.index, data["close"], label="Closing Price", alpha=0.6)
plt.plot(data.index, data["short_ma"], label="20-Day MA", color="orange")
plt.plot(data.index, data["long_ma"], label="50-Day MA", color="red")
plt.title("Closing Price with Moving Averages")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()


# 6. Daily Returns

In [None]:
# Calculate daily returns
data["daily_returns"] = data["close"].pct_change()

# Plot histogram of daily returns
plt.figure(figsize=(12, 6))
sns.histplot(data["daily_returns"].dropna(), kde=True, bins=50, color="green")
plt.title("Distribution of Daily Returns")
plt.xlabel("Daily Returns")
plt.ylabel("Frequency")
plt.show()


# 7. Correlation Analysis

In [None]:
# Correlation heatmap (if additional columns like volume or other features exist)
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()


# 8. Maximum Drawdown Analysis

In [None]:
# Calculate cumulative returns and drawdowns
data["cumulative_returns"] = (1 + data["daily_returns"]).cumprod()
data["cumulative_max"] = data["cumulative_returns"].cummax()
data["drawdown"] = (data["cumulative_returns"] - data["cumulative_max"]) / data["cumulative_max"]

# Plot drawdowns
plt.figure(figsize=(12, 6))
plt.plot(data.index, data["drawdown"], label="Drawdown", color="red")
plt.fill_between(data.index, data["drawdown"], 0, color="red", alpha=0.3)
plt.title("Drawdown Over Time")
plt.xlabel("Date")
plt.ylabel("Drawdown")
plt.legend()
plt.show()


# 9. Save Preprocessed Data

In [None]:
# Save the enriched dataset with rolling statistics, daily returns, etc.
output_path = "data/output/preprocessed_data.csv"
data.to_csv(output_path)
print(f"Preprocessed data saved to {output_path}.")
