In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from jinja2 import Template

import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "SPY.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "cheddarbutler/spyhpd",
  file_path,
  # Provide any additional arguments like 
  # sql_query or pandas_kwargs. See the 
  # documenation for more information:
  # https://github.com/Kaggle/kagglehub/blob/main/README.md#kaggledatasetadapterpandas
)

# Ensure 'Date' is a proper datetime type
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

# Ensure sorting in ascending order
df = df.sort_values(by="Date")

# Ensure numeric columns are properly formatted
for col in ["Close/Last", "Volume", "Open", "High", "Low"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Sort by Date
df = df.sort_values(by="Date")

# Calculate momentum indicators
df["Daily_Return"] = df["Close/Last"].pct_change() * 100  # Daily return in percentage
df["SMA_10"] = df["Close/Last"].rolling(window=10).mean()  # 10-day Simple Moving Average
df["EMA_10"] = df["Close/Last"].ewm(span=10, adjust=False).mean()  # 10-day Exponential Moving Average

# Relative Strength Index (RSI)
delta = df["Close/Last"].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
df["RSI"] = 100 - (100 / (1 + rs))

# MACD Calculation
df["MACD"] = df["Close/Last"].ewm(span=12, adjust=False).mean() - df["Close/Last"].ewm(span=26, adjust=False).mean()
df["MACD_Signal"] = df["MACD"].ewm(span=9, adjust=False).mean()

# Bollinger Bands
df["Upper_Band"] = df["SMA_10"] + (df["Close/Last"].rolling(window=10).std() * 2)
df["Lower_Band"] = df["SMA_10"] - (df["Close/Last"].rolling(window=10).std() * 2)

# Remove Date from summary statistics
summary_stats = df.drop(columns=["Date"]).describe().round(2)
summary_stats.to_csv("summary_statistics.csv")

# Filter last three months
three_months_ago = df["Date"].max() - pd.DateOffset(months=3)
df_recent = df[df["Date"] >= three_months_ago]

# Plot: Closing Price Over Time (Full)
plt.figure(figsize=(10, 5))
plt.plot(df["Date"], df["Close/Last"], marker="o", linestyle="-", color="blue")
plt.title("Stock Closing Prices Over Time")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("closing_prices.png")
plt.close()

# Plot: Closing Price Over the Last 3 Months
plt.figure(figsize=(10, 5))
plt.plot(df_recent["Date"], df_recent["Close/Last"], marker="o", linestyle="-", color="darkblue")
plt.title("Closing Prices (Last 3 Months)")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("closing_prices_recent.png")
plt.close()

# Plot: Log-normalized Volume
plt.figure(figsize=(10, 5))
log_volume = np.log1p(df["Volume"])
plt.bar(df["Date"], log_volume, color="purple")
plt.title("Log-Normalized Trading Volume Over Time")
plt.xlabel("Date")
plt.ylabel("Log(Volume)")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("trading_volume.png")
plt.close()

# Plot: RSI Indicator
plt.figure(figsize=(10, 5))
plt.plot(df["Date"], df["RSI"], color="orange")
plt.axhline(70, linestyle="--", color="red", alpha=0.7)  # Overbought level
plt.axhline(30, linestyle="--", color="green", alpha=0.7)  # Oversold level
plt.title("Relative Strength Index (RSI)")
plt.xlabel("Date")
plt.ylabel("RSI")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("rsi.png")
plt.close()

# Plot: MACD Indicator
plt.figure(figsize=(10, 5))
plt.plot(df["Date"], df["MACD"], label="MACD", color="blue")
plt.plot(df["Date"], df["MACD_Signal"], label="Signal Line", color="red", linestyle="--")
plt.axhline(0, color="black", linewidth=0.5, linestyle="--")
plt.title("MACD Indicator")
plt.xlabel("Date")
plt.ylabel("MACD Value")
plt.xticks(rotation=45)
plt.legend()
plt.grid()
plt.savefig("macd.png")
plt.close()

# Plot: Bollinger Bands
plt.figure(figsize=(10, 5))
plt.plot(df["Date"], df["Close/Last"], color="blue", label="Closing Price")
plt.plot(df["Date"], df["Upper_Band"], color="red", linestyle="--", label="Upper Band")
plt.plot(df["Date"], df["Lower_Band"], color="green", linestyle="--", label="Lower Band")
plt.fill_between(df["Date"], df["Lower_Band"], df["Upper_Band"], color="gray", alpha=0.3)
plt.title("Bollinger Bands")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.xticks(rotation=45)
plt.grid()
plt.savefig("bollinger_bands.png")
plt.close()

# Generate HTML Report
html_template = """
<!DOCTYPE html>
<html>
<head>
    <title>Stock Data Report</title>
    <style>
        body { font-family: Arial, sans-serif; }
        h1 { text-align: center; }
        img { display: block; margin: 10px auto; max-width: 90%; }
        table { width: 80%; margin: auto; border-collapse: collapse; }
        th, td { border: 1px solid black; padding: 8px; text-align: center; }
        th { background-color: lightgray; }
    </style>
</head>
<body>
    <h1>Stock Data Analysis Report</h1>
    <h2>Summary Statistics</h2>
    <table>
        {{ summary_table }}
    </table>
    <h2>Charts</h2>
    <img src="closing_prices.png" alt="Closing Prices">
    <img src="closing_prices_recent.png" alt="Closing Prices (Last 3 Months)">
    <img src="trading_volume.png" alt="Log-Normalized Trading Volume">
    <img src="rsi.png" alt="Relative Strength Index">
    <img src="macd.png" alt="MACD Indicator">
    <img src="bollinger_bands.png" alt="Bollinger Bands">
</body>
</html>
"""

summary_table_html = summary_stats.to_html(classes="dataframe", border=1)

# Render the HTML report
report_html = Template(html_template).render(summary_table=summary_table_html)

with open("stock_report.html", "w", encoding="utf-8") as f:
    f.write(report_html)

print("Report saved as 'stock_report.html'")


  df = kagglehub.load_dataset(
  return op(a, b)
  return op(a, b)


Report saved as 'stock_report.html'


In [None]:
# Test input (Kaggle notebook used in this example)
# Install dependencies as needed:
# pip install kagglehub[pandas-datasets]
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "SPY.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "cheddarbutler/spyhpd",
  file_path,
  # Provide any additional arguments like 
  # sql_query or pandas_kwargs. See the 
  # documenation for more information:
  # https://github.com/Kaggle/kagglehub/blob/main/README.md#kaggledatasetadapterpandas
)

print("First 5 records:", df.head())

In [2]:
# Comprehensive Report
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from jinja2 import Template
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "SPY.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "cheddarbutler/spyhpd",
  file_path,
  # Provide any additional arguments like 
  # sql_query or pandas_kwargs. See the 
  # documenation for more information:
  # https://github.com/Kaggle/kagglehub/blob/main/README.md#kaggledatasetadapterpandas
)
# Ensure numeric columns are properly formatted
for col in ["Close/Last", "Volume", "Open", "High", "Low"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Sort by Date
# Ensure 'Date' is a proper datetime type
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

# Ensure sorting in ascending order
df = df.sort_values(by="Date")

# Fix: Filter last three months properly
three_months_ago = df["Date"].max() - pd.DateOffset(months=3)
df_recent = df[df["Date"] >= three_months_ago]


# Generate summary statistics with rounded values
summary_stats = df.describe().round(2)
summary_stats.to_csv("summary_statistics.csv")

# Filter last three months
df_recent = df[df["Date"] >= df["Date"].max() - pd.DateOffset(months=3)]

# Plot: Closing price over time (full dataset)
plt.figure(figsize=(10, 5))
plt.plot(df["Date"], df["Close/Last"], marker="o", linestyle="-", color="blue")
plt.title("Stock Closing Prices Over Time")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("closing_prices.png")
plt.close()

# Plot: Closing price over the last 3 months
plt.figure(figsize=(10, 5))
plt.plot(df_recent["Date"], df_recent["Close/Last"], marker="o", linestyle="-", color="darkblue")
plt.title("Closing Prices (Last 3 Months)")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("closing_prices_recent.png")
plt.close()

# Plot: Log-normalized Volume
plt.figure(figsize=(10, 5))
log_volume = np.log1p(df["Volume"])  # log1p avoids log(0) issues
plt.bar(df["Date"], log_volume, color="purple")
plt.title("Log-Normalized Trading Volume Over Time")
plt.xlabel("Date")
plt.ylabel("Log(Volume)")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("trading_volume.png")
plt.close()

# Generate an HTML report
html_template = """
<!DOCTYPE html>
<html>
<head>
    <title>Stock Data Report</title>
    <style>
        body { font-family: Arial, sans-serif; }
        h1 { text-align: center; }
        img { display: block; margin: 10px auto; max-width: 90%; }
        table { width: 80%; margin: auto; border-collapse: collapse; }
        th, td { border: 1px solid black; padding: 8px; text-align: center; }
        th { background-color: lightgray; }
    </style>
</head>
<body>
    <h1>Stock Data Analysis Report</h1>
    <h2>Summary Statistics</h2>
    <table>
        {{ summary_table }}
    </table>
    <h2>Charts</h2>
    <img src="closing_prices.png" alt="Closing Prices">
    <img src="closing_prices_recent.png" alt="Closing Prices (Last 3 Months)">
    <img src="trading_volume.png" alt="Log-Normalized Trading Volume">
</body>
</html>
"""

summary_table_html = summary_stats.to_html(classes="dataframe", border=1)

# Render the HTML report
report_html = Template(html_template).render(summary_table=summary_table_html)

with open("stock_report.html", "w", encoding="utf-8") as f:
    f.write(report_html)

print("Report saved as 'stock_report.html'")


  df = kagglehub.load_dataset(


Report saved as 'stock_report.html'


In [None]:
# Rough, needs work
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from jinja2 import Template
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "SPY.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "cheddarbutler/spyhpd",
  file_path,
  # Provide any additional arguments like 
  # sql_query or pandas_kwargs. See the 
  # documenation for more information:
  # https://github.com/Kaggle/kagglehub/blob/main/README.md#kaggledatasetadapterpandas
)

# Ensure numeric columns are properly formatted
for col in ["Close/Last", "Volume", "Open", "High", "Low"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Sort by Date for proper plotting
df = df.sort_values(by="Date")

# Generate summary statistics
summary_stats = df.describe()

# Save summary statistics
summary_stats.to_csv("summary_statistics.csv")

# Create plots
plt.figure(figsize=(10, 5))
plt.plot(df["Date"], df["Close/Last"], marker="o", linestyle="-", color="blue")
plt.title("Stock Closing Prices Over Time")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("closing_prices.png")
plt.close()

plt.figure(figsize=(10, 5))
plt.bar(df["Date"], df["Volume"], color="purple")
plt.title("Stock Trading Volume Over Time")
plt.xlabel("Date")
plt.ylabel("Volume")
plt.xticks(rotation=45)
plt.grid()
plt.savefig("trading_volume.png")
plt.close()

plt.figure(figsize=(10, 5))
plt.plot(df["Date"], df["High"], color="green", label="High")
plt.plot(df["Date"], df["Low"], color="red", label="Low")
plt.fill_between(df["Date"], df["Low"], df["High"], color="gray", alpha=0.3)
plt.title("Stock Price Highs and Lows Over Time")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.xticks(rotation=45)
plt.grid()
plt.savefig("highs_lows.png")
plt.close()

# Generate an HTML report
html_template = """
<!DOCTYPE html>
<html>
<head>
    <title>Stock Data Report</title>
    <style>
        body { font-family: Arial, sans-serif; }
        h1 { text-align: center; }
        img { display: block; margin: 10px auto; max-width: 90%; }
        table { width: 80%; margin: auto; border-collapse: collapse; }
        th, td { border: 1px solid black; padding: 8px; text-align: center; }
        th { background-color: lightgray; }
    </style>
</head>
<body>
    <h1>Stock Data Analysis Report</h1>
    <h2>Summary Statistics</h2>
    <table>
        {{ summary_table }}
    </table>
    <h2>Charts</h2>
    <img src="closing_prices.png" alt="Closing Prices">
    <img src="trading_volume.png" alt="Trading Volume">
    <img src="highs_lows.png" alt="Highs and Lows">
</body>
</html>
"""

summary_table_html = summary_stats.to_html(classes="dataframe", border=1)

# Render the HTML report
report_html = Template(html_template).render(summary_table=summary_table_html)

with open("stock_report.html", "w", encoding="utf-8") as f:
    f.write(report_html)

print("Report saved as 'stock_report.html'")
