<a href="https://colab.research.google.com/github/Shubhangi-Shubhra/Algorithimic-Trading/blob/main/Copy_of_Stock_Market_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#importing libraries
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats

In [None]:
# Define sectors and their representative ETFs
sectors = {
    'Technology': 'XLK',
    'Healthcare': 'XLV',
    'Financials': 'XLF',
    'Consumer Discretionary': 'XLY',
    'Industrials': 'XLI',
    'Energy': 'XLE',
    'Consumer Staples': 'XLP',
    'Utilities': 'XLU',
    'Real Estate': 'XLRE',
    'Materials': 'XLB',
    'Communication Services': 'XLC'
}

In [None]:
# Define major companies in each sector
major_companies = {
    'Technology': ['AAPL', 'MSFT', 'NVDA', 'ADBE', 'ORCL'],
    'Healthcare': ['JNJ', 'UNH', 'PFE', 'ABT', 'TMO'],
    'Financials': ['JPM', 'BAC', 'WFC', 'C', 'GS'],
    'Consumer Discretionary': ['AMZN', 'HD', 'NKE', 'MCD', 'SBUX'],
    'Industrials': ['BA', 'UNP', 'HON', 'UPS', 'CAT'],
    'Energy': ['XOM', 'CVX', 'COP', 'SLB', 'EOG'],
    'Consumer Staples': ['PG', 'KO', 'PEP', 'WMT', 'COST'],
    'Utilities': ['NEE', 'DUK', 'SO', 'D', 'AEP'],
    'Real Estate': ['AMT', 'PLD', 'CCI', 'EQIX', 'PSA'],
    'Materials': ['LIN', 'SHW', 'APD', 'ECL', 'NEM'],
    'Communication Services': ['GOOGL', 'FB', 'NFLX', 'CMCSA', 'VZ']
}


In [None]:
# Function to download data
def get_sector_data(sectors, start_date, end_date):
    data = yf.download(list(sectors.values()), start=start_date, end=end_date)['Adj Close']
    data.columns = sectors.keys()
    return data

# Download data
start_date = '2018-01-01'
end_date = '2023-07-30'
sector_data = get_sector_data(sectors, start_date, end_date)


[*********************100%%**********************]  11 of 11 completed


In [None]:
# Calculate daily returns
returns = sector_data.pct_change()

# 1. Cumulative Returns
cumulative_returns = (1 + returns).cumprod()

# Visualize Cumulative Returns
fig = px.line(cumulative_returns, x=cumulative_returns.index, y=cumulative_returns.columns,
              title='Cumulative Returns by Sector')
fig.show()

  v = v.dt.to_pydatetime()


In [None]:
# 2. Volatility Analysis
#Volatility is calculated as the standard deviation of daily returns, annualized by multiplying by the square root of 252

volatility = returns.std() * np.sqrt(252)  # Annualized volatility

# Visualize Volatility
fig = px.bar(x=volatility.index, y=volatility.values,
             title='Annualized Volatility by Sector')
fig.show()

In [None]:
#The Sharpe Ratio is a widely-used metric to evaluate the risk-adjusted return of an investment.
# It measures how well the return of an asset compensates an investor for the risk taken.
#Higher Sharpe ratios indicate better risk-adjusted performance.
# 3. Sharpe Ratio (assuming risk-free rate of 2%)
risk_free_rate = 0.02
sharpe_ratio = (returns.mean() * 252 - risk_free_rate) / (returns.std() * np.sqrt(252))

# Visualize Sharpe Ratio
fig = px.bar(x=sharpe_ratio.index, y=sharpe_ratio.values,
             title='Sharpe Ratio by Sector')
fig.show()



In [None]:
#Maximum Drawdown (MDD) is a risk metric that  is used to assess the worst-case loss an investor might experience during a downturn.
#MDD is important because it highlights the potential risk of substantial losses, even if the long-term returns are positive.
# 4. Maximum Drawdown
def calculate_max_drawdown(return_series):
    comp_ret = (return_series + 1).cumprod()
    peak = comp_ret.expanding(min_periods=1).max()
    dd = (comp_ret/peak) - 1
    return dd.min()

max_drawdown = returns.apply(calculate_max_drawdown)

# Visualize Maximum Drawdown
fig = px.bar(x=max_drawdown.index, y=max_drawdown.values,
             title='Maximum Drawdown by Sector')
fig.show()


In [None]:
#Rolling Correlation measures the relationship between two time series over a moving window.
# In this case, you’re calculating the correlation between the daily returns of various sectors and the S&P 500 over a 60-day rolling window.
# Correlation quantifies the degree to which two variables move in relation to each other.
#The rolling window is a fixed period over which calculations are made and then moved forward in time.
# 5. Rolling Correlation with S&P 500
sp500 = yf.download('^GSPC', start=start_date, end=end_date)['Adj Close']
sp500_returns = sp500.pct_change()

rolling_correlation = returns.rolling(window=60).corr(sp500_returns)

# Visualize Rolling Correlation
fig = px.line(rolling_correlation, x=rolling_correlation.index, y=rolling_correlation.columns,
              title='60-day Rolling Correlation with S&P 500')
fig.show()

[*********************100%%**********************]  1 of 1 completed

The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [None]:
#The Risk-Return Trade-off is a fundamental concept in finance that examines the relationship between the potential
#return of an investment and the associated risk.
# 6. Risk-Return Trade-off
annual_returns = returns.mean() * 252
annual_volatility = returns.std() * np.sqrt(252)

fig = px.scatter(x=annual_volatility, y=annual_returns, text=annual_volatility.index,
                 title='Risk-Return Trade-off by Sector')
fig.update_traces(textposition='top center')
fig.show()

In [None]:
# The Sector Performance Heatmap visualizes the correlation between the returns of different sectors over time.
corr_matrix = returns.corr()

fig = px.imshow(corr_matrix, text_auto=True, aspect="auto",
                title="Sector Correlation Heatmap")
fig.show()

In [None]:
# 10. Sector Rankings
def rank_sectors(data):
    return data.rank(ascending=False)

rankings = pd.DataFrame({
    'Returns': rank_sectors(annual_returns),
    'Volatility': rank_sectors(-annual_volatility),
    'Sharpe Ratio': rank_sectors(sharpe_ratio),
    'Max Drawdown': rank_sectors(-max_drawdown),

})

rankings['Overall'] = rankings.mean(axis=1)
rankings = rankings.sort_values('Overall', ascending=True)

# Visualize Rankings
fig = px.imshow(rankings, text_auto=True, aspect="auto",
                title="Sector Rankings (Lower is Better)")
fig.show()

In [None]:

# Print the safest sector based on overall ranking
safest_sector = rankings['Overall'].idxmin()
print(f"Based on our analysis, the safest sector to invest in is: {safest_sector}")

Based on our analysis, the safest sector to invest in is: Industrials


In [None]:
# 8. Conditional Value at Risk (CVaR)
def calculate_cvar(returns, level=5):
    var = np.percentile(returns, level)
    return returns[returns <= var].mean()

cvar = returns.apply(calculate_cvar)

In [None]:
# 7. Value at Risk (VaR)
def calculate_var(returns, level=5):
    return np.percentile(returns, level)

var = returns.apply(calculate_var)

In [None]:

# Additional insights
print("\nTop 3 sectors by different metrics:")
print("Highest Returns:", annual_returns.nlargest(3))
print("Lowest Volatility:", annual_volatility.nsmallest(3))
print("Highest Sharpe Ratio:", sharpe_ratio.nlargest(3))
print("Lowest Maximum Drawdown:", max_drawdown.nsmallest(3))
print("Lowest Value at Risk:", var.nlargest(3))
print("Lowest Conditional Value at Risk:", cvar.nlargest(3))


Top 3 sectors by different metrics:
Highest Returns: Energy                    0.230518
Communication Services    0.139528
Financials                0.139314
dtype: float64
Lowest Volatility: Consumer Staples    0.169663
Materials           0.189182
Real Estate         0.219520
dtype: float64
Highest Sharpe Ratio: Energy                    0.769551
Materials                 0.535034
Communication Services    0.479294
dtype: float64
Lowest Maximum Drawdown: Financials               -0.668130
Healthcare               -0.466536
Consumer Discretionary   -0.428625
dtype: float64
Lowest Value at Risk: Technology   NaN
Healthcare   NaN
Financials   NaN
dtype: float64
Lowest Conditional Value at Risk: Technology   NaN
Healthcare   NaN
Financials   NaN
dtype: float64


In [None]:
# Analyze individual stocks from top performing sectors
top_sectors = annual_returns.nlargest(3).index
top_companies = []
for sector in top_sectors:
    top_companies.extend(major_companies[sector])

In [None]:
# Download data for top companies
company_data = yf.download(top_companies, start='2018-07-30', end='2023-07-30')['Adj Close']

# Calculate 5-year returns
returns_5y = (company_data.iloc[-1] - company_data.iloc[0]) / company_data.iloc[0] * 100

# Sort and get the top performer
top_performer = returns_5y.idxmax()
top_return = returns_5y.max()

print(f"\nThe company with the highest 5-year return is: {top_performer}")
print(f"5-year return: {top_return:.2f}%")


[*********************100%%**********************]  15 of 15 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['FB']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')



The company with the highest 5-year return is: GOOGL
5-year return: 115.57%


In [None]:
# Visualize top 10 performing companies
top_10 = returns_5y.nlargest(10)
fig = px.bar(x=top_10.index, y=top_10.values,
             title='Top 10 Companies by 5-Year Return')
fig.update_layout(xaxis_title='Company', yaxis_title='5-Year Return (%)')
fig.show()


In [None]:
# Detailed analysis of the top performer
top_stock = yf.Ticker(top_performer)

# Get financial data
financials = top_stock.financials
print(f"\nFinancial Summary for {top_performer}:")
print(financials)


Financial Summary for GOOGL:
                                                        2023-12-31  \
Tax Effect Of Unusual Items                           -286479000.0   
Tax Rate For Calcs                                           0.139   
Normalized EBITDA                                   100032000000.0   
Total Unusual Items                                  -2061000000.0   
Total Unusual Items Excluding Goodwill               -2061000000.0   
Net Income From Continuing Operation Net Minori...   73795000000.0   
Reconciled Depreciation                              11946000000.0   
Reconciled Cost Of Revenue                          133332000000.0   
EBITDA                                               97971000000.0   
EBIT                                                 86025000000.0   
Net Interest Income                                   3557000000.0   
Interest Expense                                       308000000.0   
Interest Income                                       386500

In [None]:
# Get business summary
print(f"\nBusiness Summary for {top_performer}:")
print(top_stock.info['longBusinessSummary'])

# Plot stock price history
history = top_stock.history(period="5y")
fig = px.line(history, x=history.index, y='Close', title=f'{top_performer} Stock Price - Last 5 Years')
fig.show()


Business Summary for GOOGL:
Alphabet Inc. offers various products and platforms in the United States, Europe, the Middle East, Africa, the Asia-Pacific, Canada, and Latin America. It operates through Google Services, Google Cloud, and Other Bets segments. The Google Services segment provides products and services, including ads, Android, Chrome, devices, Gmail, Google Drive, Google Maps, Google Photos, Google Play, Search, and YouTube. It is also involved in the sale of apps and in-app purchases and digital content in the Google Play and YouTube; and devices, as well as in the provision of YouTube consumer subscription services. The Google Cloud segment offers infrastructure, cybersecurity, databases, analytics, AI, and other services; Google Workspace that include cloud-based communication and collaboration tools for enterprises, such as Gmail, Docs, Drive, Calendar, and Meet; and other services for enterprise customers. The Other Bets segment sells healthcare-related and internet se


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [None]:
# Calculate and plot moving averages
history['MA50'] = history['Close'].rolling(window=50).mean()
history['MA200'] = history['Close'].rolling(window=200).mean()

fig = go.Figure()
fig.add_trace(go.Scatter(x=history.index, y=history['Close'], name='Close'))
fig.add_trace(go.Scatter(x=history.index, y=history['MA50'], name='50-day MA'))
fig.add_trace(go.Scatter(x=history.index, y=history['MA200'], name='200-day MA'))
fig.update_layout(title=f'{top_performer} Stock Price with Moving Averages', xaxis_title='Date', yaxis_title='Price')
fig.show()

In [None]:
"""
Problem Statement

Investors need to identify the most promising sectors and companies for investment. This project solves this by:

1. **Evaluating Sector Performance**: Analyzing metrics like cumulative returns, volatility, Sharpe ratio, maximum drawdown,
and risk measures (VaR and CVaR) to compare sector performance.

2. **Identifying Top Companies**: Finding the top-performing companies within the leading sectors based on 5-year returns.

3. **Detailed Analysis of Top Performers**: Providing financial summaries, business insights, and visualizing stock trends
and moving averages for the leading companies.

The project offers a comprehensive framework for informed investment decisions based on historical data and risk assessment.
"""

