In [1]:
# Data Analytics Project: Nifty 50 vs HCL Technologies

# 1. Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import datetime as dt

In [2]:
# 2. Data Collection - Nifty 50
ticker = ['^NSEI']
start = dt.datetime(2011, 1, 1)
end = dt.datetime(2022, 12, 31)

# Fetch all price data for Nifty 50
df = yf.download(ticker, start=start, end=end)
print("Complete Nifty 50 data (first 5 rows):")
print(df.head())

# Fetch only closing prices for Nifty 50
nifty_close = yf.download(ticker, start=start, end=end)['Close']
print("\nNifty 50 Closing Prices (first 5 rows):")
print(nifty_close.head())

  df = yf.download(ticker, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  nifty_close = yf.download(ticker, start=start, end=end)['Close']
[*********************100%***********************]  1 of 1 completed

Complete Nifty 50 data (first 5 rows):
Price             Close         High          Low         Open Volume
Ticker            ^NSEI        ^NSEI        ^NSEI        ^NSEI  ^NSEI
Date                                                                 
2011-01-03  6157.600098  6178.549805  6147.200195  6177.450195      0
2011-01-04  6146.350098  6181.049805  6124.399902  6172.750000      0
2011-01-05  6079.799805  6141.350098  6062.350098  6141.350098      0
2011-01-06  6048.250000  6116.149902  6022.299805  6107.000000      0
2011-01-07  5904.600098  6051.200195  5883.600098  6030.899902      0

Nifty 50 Closing Prices (first 5 rows):
Ticker            ^NSEI
Date                   
2011-01-03  6157.600098
2011-01-04  6146.350098
2011-01-05  6079.799805
2011-01-06  6048.250000
2011-01-07  5904.600098





In [3]:
# 3. Data Collection - Multiple Indices
tickers = ["^NSEI", "^BSESN"]  # Nifty 50 and BSE Sensex
data = {}

for ticker in tickers:
    data[ticker] = yf.download(ticker, start=start, end=end)['Close']

# Combine into single DataFrame
combined_df = pd.concat(data, axis=1)
print("\nCombined Nifty and Sensex Closing Prices (first 5 rows):")
print(combined_df.head())

  data[ticker] = yf.download(ticker, start=start, end=end)['Close']
[*********************100%***********************]  1 of 1 completed
  data[ticker] = yf.download(ticker, start=start, end=end)['Close']
[*********************100%***********************]  1 of 1 completed


Combined Nifty and Sensex Closing Prices (first 5 rows):
                  ^NSEI        ^BSESN
Ticker            ^NSEI        ^BSESN
Date                                 
2011-01-03  6157.600098  20561.050781
2011-01-04  6146.350098  20498.720703
2011-01-05  6079.799805  20301.099609
2011-01-06  6048.250000  20184.740234
2011-01-07  5904.600098  19691.810547





In [4]:
# 4. Data Collection - Nifty 50 and HCL Tech
tickers = ['^NSEI', 'HCLTECH.NS']
data = {}

for ticker in tickers:
    data[ticker] = yf.download(ticker, start=start, end=end)[['Close']]

# Combine and rename columns
stock_df = pd.concat(data, axis=1)
stock_df.columns = ['Nifty', 'HCL']
print("\nNifty and HCL Tech Closing Prices (first 5 rows):")
print(stock_df.head())

  data[ticker] = yf.download(ticker, start=start, end=end)[['Close']]
[*********************100%***********************]  1 of 1 completed
  data[ticker] = yf.download(ticker, start=start, end=end)[['Close']]
[*********************100%***********************]  1 of 1 completed


Nifty and HCL Tech Closing Prices (first 5 rows):
                  Nifty        HCL
Date                              
2011-01-03  6157.600098  81.966270
2011-01-04  6146.350098  83.229660
2011-01-05  6079.799805  85.251045
2011-01-06  6048.250000  86.216667
2011-01-07  5904.600098  83.969658





In [5]:
# 5. Descriptive Statistics
print("\nDescriptive Statistics for both stocks:")
print(stock_df.describe())

# Individual statistics
nifty_stats = stock_df['Nifty'].describe()
hcl_stats = stock_df['HCL'].describe()

print("\nNifty Descriptive Stats:")
print(nifty_stats)
print("\nHCL Descriptive Stats:")
print(hcl_stats)


Descriptive Statistics for both stocks:
              Nifty          HCL
count   2939.000000  2959.000000
mean    9737.655923   409.209470
std     3793.234035   272.957945
min     4544.200195    65.996704
25%     6204.150146   235.138496
50%     8777.150391   351.997009
75%    11521.424805   464.540421
max    18812.500000  1175.340576

Nifty Descriptive Stats:
count     2939.000000
mean      9737.655923
std       3793.234035
min       4544.200195
25%       6204.150146
50%       8777.150391
75%      11521.424805
max      18812.500000
Name: Nifty, dtype: float64

HCL Descriptive Stats:
count    2959.000000
mean      409.209470
std       272.957945
min        65.996704
25%       235.138496
50%       351.997009
75%       464.540421
max      1175.340576
Name: HCL, dtype: float64


In [6]:
# 6. Random Sampling and Z-Score Analysis
# Set random seed for reproducibility
np.random.seed(42)

# Randomly select 5 data points
random_nifty = stock_df['Nifty'].sample(5)
random_hcl = stock_df['HCL'].sample(5)

print("\nRandomly selected Nifty data points:")
print(random_nifty)
print("\nRandomly selected HCL data points:")
print(random_hcl)

# Calculate mean and standard deviation
nifty_mean, nifty_std = stock_df['Nifty'].mean(), stock_df['Nifty'].std()
hcl_mean, hcl_std = stock_df['HCL'].mean(), stock_df['HCL'].std()

# Calculate Z-scores
nifty_z_scores = (random_nifty - nifty_mean) / nifty_std
hcl_z_scores = (random_hcl - hcl_mean) / hcl_std

print("\nZ-scores for randomly selected Nifty points:")
print(nifty_z_scores)
print("\nZ-scores for randomly selected HCL points:")
print(hcl_z_scores)


Randomly selected Nifty data points:
Date
2019-03-05    10987.450195
2017-11-10    10321.750000
2014-02-05     6022.399902
2019-05-15    11157.000000
2019-03-25    11354.250000
Name: Nifty, dtype: float64

Randomly selected HCL data points:
Date
2018-07-30    389.877625
2017-03-22    353.645142
2022-08-12    861.906982
2016-09-07    314.178192
2017-10-13    378.906982
Name: HCL, dtype: float64

Z-scores for randomly selected Nifty points:
Date
2019-03-05    0.329480
2017-11-10    0.153983
2014-02-05   -0.979443
2019-05-15    0.374178
2019-03-25    0.426178
Name: Nifty, dtype: float64

Z-scores for randomly selected HCL points:
Date
2018-07-30   -0.070824
2017-03-22   -0.203564
2022-08-12    1.658488
2016-09-07   -0.348154
2017-10-13   -0.111015
Name: HCL, dtype: float64


In [7]:
# 7. Z-Score Interpretation Function
def interpret_zscore(zscore, ticker):
    if zscore > 0:
        return f"{ticker} is performing well (above mean) (⇗)"
    elif zscore < 0:
        return f"{ticker} is performing poorly (below mean) (⇘)"
    else:
        return f"{ticker} is performing at average (equal to mean) (-)"

# Print interpretations with colored header
print("\n\033[42mZ-Score Interpretation for Random Points\033[0m")
for i in range(5):
    print(f"\nPoint {i+1}:")
    print(f"Nifty Z-Score: {nifty_z_scores.iloc[i]:.2f} - {interpret_zscore(nifty_z_scores.iloc[i], 'Nifty')}")
    print(f"HCL Z-Score: {hcl_z_scores.iloc[i]:.2f} - {interpret_zscore(hcl_z_scores.iloc[i], 'HCL Tech')}")
    print("###################################")


[42mZ-Score Interpretation for Random Points[0m

Point 1:
Nifty Z-Score: 0.33 - Nifty is performing well (above mean) (⇗)
HCL Z-Score: -0.07 - HCL Tech is performing poorly (below mean) (⇘)
###################################

Point 2:
Nifty Z-Score: 0.15 - Nifty is performing well (above mean) (⇗)
HCL Z-Score: -0.20 - HCL Tech is performing poorly (below mean) (⇘)
###################################

Point 3:
Nifty Z-Score: -0.98 - Nifty is performing poorly (below mean) (⇘)
HCL Z-Score: 1.66 - HCL Tech is performing well (above mean) (⇗)
###################################

Point 4:
Nifty Z-Score: 0.37 - Nifty is performing well (above mean) (⇗)
HCL Z-Score: -0.35 - HCL Tech is performing poorly (below mean) (⇘)
###################################

Point 5:
Nifty Z-Score: 0.43 - Nifty is performing well (above mean) (⇗)
HCL Z-Score: -0.11 - HCL Tech is performing poorly (below mean) (⇘)
###################################


In [8]:
# 8. Visualization (Optional - uncomment to use)
# plt.figure(figsize=(12, 6))
# plt.plot(stock_df['Nifty'], label='Nifty 50')
# plt.plot(stock_df['HCL'], label='HCL Tech')
# plt.title('Nifty 50 vs HCL Technologies (2011-2022)')
# plt.xlabel('Date')
# plt.ylabel('Price')
# plt.legend()
# plt.grid(True)
# plt.show()