In [None]:
!pip install yfinance --upgrade --quiet
!pip install matplotlib pandas --quiet
!pip install seaborn --quiet


import seaborn as sns
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

# Sample ASX Tickers grouped by sector
sector_map = {
    'BHP.AX': 'Mining',
    'RIO.AX': 'Mining',
    'FMG.AX': 'Mining',
    'NST.AX': 'Mining',

    'CBA.AX': 'Finance',
    'NAB.AX': 'Finance',
    'WBC.AX': 'Finance',
    'ANZ.AX': 'Finance',

    'WES.AX': 'Retail',
    'WOW.AX': 'Retail',
    'COL.AX': 'Retail',
    'JBH.AX': 'Retail',

    'CSL.AX': 'Healthcare',
    'RMD.AX': 'Healthcare',
    'COH.AX': 'Healthcare',
    'SHL.AX': 'Healthcare',

    'XRO.AX': 'Technology',
    'CPU.AX': 'Technology',
    'WTC.AX': 'Technology',
    'APX.AX': 'Technology',
}

tickers = list(sector_map.keys())

# Download historical data for all tickers
"""
data = yf.download(tickers, start="2023-06-01", end="2024-06-01")
adj_close = data.xs('Adj Close', axis=1, level=0)

adj_close.head()
"""
data = yf.download(tickers, start="2023-06-01", end="2024-06-01", group_by='ticker', auto_adjust=True)

# Extract just the 'Close' price for each ticker
close_prices = pd.DataFrame()

for ticker in tickers:
    try:
        close_prices[ticker] = data[ticker]['Close']
    except KeyError:
        print(f"Missing data for {ticker}")

close_prices.head() # Shows first five entries

# Calculate daily returns
daily_returns = close_prices.pct_change()

# Preview the results
daily_returns.head()

# Calculate metrics
mean_returns = daily_returns.mean()
volatility = daily_returns.std()

# Combine into one summary table
stock_summary = pd.DataFrame({
    'Average Daily Return': mean_returns,
    'Volatility (Std Dev)': volatility
})

# Sort by return
stock_summary = stock_summary.sort_values(by='Average Daily Return', ascending=False)
# Add Sector column based on index mapping
stock_summary['Sector'] = stock_summary.index.map(sector_map)


print(stock_summary)

#  STEP 1: Calculate 20-day and 50-day Moving Averages
# This uses rolling() to compute moving averages for each column (stock)
ma_20 = close_prices.rolling(window=20).mean()
ma_50 = close_prices.rolling(window=50).mean()

#  STEP 2: Rename Columns for Clarity
# This prevents column name collisions when merging with original prices
ma_20.columns = [f"{col}_MA20" for col in ma_20.columns]
ma_50.columns = [f"{col}_MA50" for col in ma_50.columns]

#  STEP 3: Combine Original Close Prices + MA20 + MA50
# We're aligning all by date (same index), merging horizontally
combined_df = pd.concat([close_prices, ma_20, ma_50], axis=1)

#  STEP 4: Export to CSV for viewing or reporting
combined_df.to_csv("all_stocks_with_moving_averages.csv")

#  Optional: View the last few rows to verify
combined_df.tail()

## Identifying the best/worst performing stocks.
# Best-performing stock per sector
best_stocks = stock_summary.groupby('Sector')['Average Daily Return'].idxmax()

# Worst-performing stock per sector
worst_stocks = stock_summary.groupby('Sector')['Average Daily Return'].idxmin()

sector_summary = stock_summary.groupby('Sector').agg({
    'Average Daily Return': 'mean',
    'Volatility (Std Dev)': 'mean'
})

sector_summary['Best Stock'] = best_stocks.values
sector_summary['Worst Stock'] = worst_stocks.values


# Export to the CSV File.
sector_summary.to_csv('sector_summary.csv')


##### Data Visualization #####

# Bar chart for volatility by sector
plt.figure(figsize=(10, 6))
sector_summary['Volatility (Std Dev)'].plot(kind='bar', color='orange')
plt.title('Volatility by Sector')
plt.xlabel('Sector')
plt.ylabel('Volatility (Standard Deviation)')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


# Bar chart for average daily return by sector
plt.figure(figsize=(10, 6))
sector_summary['Average Daily Return'].plot(kind='bar', color='skyblue')
plt.title('Average Daily Return by Sector')
plt.xlabel('Sector')
plt.ylabel('Average Daily Return')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# Calculate correlation between stock returns
correlation_matrix = daily_returns.corr()

# Plot the heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Stock Return Correlation Heatmap')
plt.tight_layout()
plt.show()


