In [1]:
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from scipy.optimize import minimize

In [None]:
def load_stocks(data_dir, stock_files):
    # List to store data from each file
    combined_df_list = []

    for file in stock_files:
        path = os.path.join(data_dir, file)

        if os.path.isfile(path):
            try:
                # Load CSV
                df = pd.read_csv(path, encoding='utf-8')

                # Keep only needed columns
                df = df[['Date', 'Ticker', 'Close', 'Sector']]

                # Convert Date to timezone-aware datetime (UTC)
                df['Date'] = pd.to_datetime(df['Date'], utc=True)

                combined_df_list.append(df)
            except Exception as e:
                print(f"Error reading {file}: {e}")
        else:
            print(f"File not found: {file}")

    # Concatenate all sector data
    if combined_df_list:
        combined_df = pd.concat(combined_df_list, ignore_index=True)
    else:
        combined_df = pd.DataFrame(columns=['Date', 'Ticker', 'Close', 'Sector'])

    return combined_df

In [None]:
# Configuration
data_dir = "C:/Users/Z2005081/Downloads/COPY FROM SCHOOL PC/DATA_MINNER/FULL_2020_2025_11_SECTION"
stock_files = [
    "Consumer_Discretionary.csv", "Consumer_Staples.csv", "Energy.csv",
    "Financials.csv", "Health_Care.csv", "Industrials.csv",
    "Information_Technology.csv", "Materials.csv", "Real_Estate.csv",
    "Communication_Services.csv", "Utilities.csv"
]

In [None]:
# Load and prepare data
combined_df = load_stocks(data_dir, stock_files)

# Sort by Ticker and Date
combined_df = combined_df.sort_values(by=['Ticker', 'Date'])

In [None]:
# Calculate log returns
combined_df['LogReturn'] = combined_df.groupby('Ticker')['Close'].transform(
    lambda x: np.log(x / x.shift(1))
)

# Drop rows with NaN log returns (first row of each Ticker group)
#combined_df = combined_df.dropna(subset=['LogReturn'])

# Reset index for clean row numbering
combined_df = combined_df.reset_index(drop=True)

In [None]:
# Display return
print("\nSample of cleaned stock log return data:\n")
print(combined_df)

In [None]:
# Pivot: Tickers as columns, Dates as rows, LogReturn as values
ret_df = combined_df.pivot(index='Date', columns='Ticker', values='LogReturn')
ret_df = ret_df.sort_index() # Sort rows by Date (just in case)
ret_df

#log_return_matrix_df.style.set_caption("Log Return Matrix (Date × Ticker)").background_gradient(cmap='coolwarm', axis=None)

In [None]:
# Correlation Matrix
corr_matrix = ret_df.corr()
corr_matrix

In [None]:
import numpy as np

# For correlation or covariance matrix
eigenvalues = np.linalg.eigvalsh(corr_matrix)

# Check if all eigenvalues are non-negative
is_psd = np.all(eigenvalues >= -1e-8)  # Allow small numerical errors

print("Is the correlation matrix PSD?", is_psd)


In [None]:
print("Smallest eigenvalue:", np.min(eigenvalues))

In [None]:
# Cummative Return for portfolio
ret_df.cumsum()

In [None]:
# Compute cumulative returns
cumulative_returns = ret_df.cumsum()

# Create interactive figure
fig = go.Figure()

# Add each ticker as a line
for ticker in cumulative_returns.columns:
    fig.add_trace(go.Scatter(
        x=cumulative_returns.index,
        y=cumulative_returns[ticker],
        mode='lines',
        name=ticker  # You can skip name=ticker if you want no legend
    ))

# Customize layout
fig.update_layout(
    title="Cumulative Log Returns (Interactive)",
    xaxis_title="Date",
    yaxis_title="Cumulative Return",
    width=1000,  # pixels
    height=800,  # pixels
    showlegend=False  # Removes the legend
)

# Show the interactive plot
fig.show()


In [None]:
# Mean(return)
ret_df.mean()

In [None]:
# Std(risk)
ret_df.std()

In [None]:
# Weigth 
W = np.ones(len(ret_df.columns))/(np.ones(len(ret_df.columns))).sum() #create a vectors of 1s
W


In [None]:
# Manual Expected Return
(W * ret_df.mean()).sum()

In [None]:
# Expected(Average)Return
ret_df.mean().dot(W)

In [None]:
# Convariance Matrix
ret_df.cov()

In [None]:
# Variance(risk) of the portfolio (550 by 550)
(W.T.dot(ret_df.cov().dot(W)))**(1/2)

In [None]:
#sharpe Ratio for individual asset (Risk adjusted return in form of the sharpe ratio)
ret_df.mean() / ret_df.std()

In [None]:
# sharpe Ratio of the portfolio
ret_df.mean().dot(W) / (W.T.dot(ret_df.cov().dot(W)))**(1/2)