In [15]:
# %matplotlib widgets

# packages and modules required 

import pandas as pd
import numpy as np

#  visualization
import plotly.graph_objs as go
from plotly.subplots import make_subplots


## DATA PREPROCESSING

### National  Stock  Exchange  (NSE)  India  publishes  the  niftystock  market  index.  The  NIFTY50  is  the  coveted  index  consisting of the top 50 companies trading on the exchange. It is one of the two main stock indices used in India. hence  indices  are  a  composition  of  many  stocks  from  different  sectors  that  collectively represent  the  state  of  the  economy.  

In [16]:
nifty50_df = pd.read_csv('../datasets/inidices/NIFTY 50.csv')

nifty50_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Turnover'], dtype='object')

In [17]:
nifty50_df.shape

(7510, 7)

In [18]:
nifty50_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7510 entries, 0 to 7509
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      7510 non-null   object 
 1   Open      7510 non-null   float64
 2   High      7510 non-null   float64
 3   Low       7510 non-null   float64
 4   Close     7510 non-null   float64
 5   Volume    6097 non-null   float64
 6   Turnover  6097 non-null   float64
dtypes: float64(6), object(1)
memory usage: 410.8+ KB


### From the volume and Turnover columns we see that they have null values as Stock inception in India is on 1995-11-03 
- We consider the timeperiod 5 years from the start of the 21st century 
    - covers pre and post recession period 
    - We follow similar numerosity reduction for the other assets(gold), and for the other indices being used

In [19]:
start = nifty50_df[nifty50_df['Date']=='2005-01-03'].index[0]
nifty50_df = nifty50_df[start:]
nifty50_df.head

<bound method NDFrame.head of             Date      Open      High       Low     Close       Volume  \
3418  2005-01-03   2080.00   2118.60   2080.00   2115.00   70506865.0   
3419  2005-01-04   2116.95   2120.15   2100.55   2103.75   72718302.0   
3420  2005-01-05   2103.75   2105.10   1990.15   2032.20  109223487.0   
3421  2005-01-06   2031.55   2035.65   1984.25   1998.35   94321939.0   
3422  2005-01-07   1998.25   2021.45   1992.55   2015.50   76285818.0   
...          ...       ...       ...       ...       ...          ...   
7505  2021-06-25  15839.35  15870.80  15772.30  15860.35  314614380.0   
7506  2021-06-28  15915.35  15915.65  15792.15  15814.70  255099272.0   
7507  2021-06-29  15807.50  15835.90  15724.05  15748.45  360334724.0   
7508  2021-06-30  15776.90  15839.10  15708.75  15721.50  262386323.0   
7509  2021-07-01  15755.05  15755.55  15667.05  15680.00  224921644.0   

          Turnover  
3418  2.375100e+10  
3419  2.416130e+10  
3420  3.622020e+10  
3421  3.2

In [20]:
nifty50_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4092 entries, 3418 to 7509
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      4092 non-null   object 
 1   Open      4092 non-null   float64
 2   High      4092 non-null   float64
 3   Low       4092 non-null   float64
 4   Close     4092 non-null   float64
 5   Volume    4092 non-null   float64
 6   Turnover  4092 non-null   float64
dtypes: float64(6), object(1)
memory usage: 223.9+ KB


### Similarly for the other datasets we are using we go for numerosity reduction (start of time:2000-01-03) for the above mentioned reasons

In [21]:
niftyAuto_df = pd.read_csv('../datasets/inidices/NIFTY AUTO.csv')
start = niftyAuto_df[niftyAuto_df['Date']=='2005-01-03'].index[0]
niftyAuto_df = niftyAuto_df[start:]

niftyBank_df = pd.read_csv('../datasets/inidices/NIFTY BANK.csv')
start = niftyBank_df[niftyBank_df['Date']=='2005-01-03'].index[0]
niftyBank_df = niftyBank_df[start:]

niftyIT_df = pd.read_csv('../datasets/inidices/NIFTY IT.csv')
start = niftyIT_df[niftyIT_df['Date']=='2005-01-03'].index[0]
niftyIT_df = niftyIT_df[start:]

niftyFin_Services_df = pd.read_csv('../datasets/inidices/NIFTY FIN SERVICE.csv')
start = niftyFin_Services_df[niftyFin_Services_df['Date']=='2005-01-03'].index[0]
niftyFin_Services_df = niftyFin_Services_df[start:]


niftyPharma_df = pd.read_csv('../datasets/inidices/NIFTY PHARMA.csv')
start = niftyPharma_df[niftyPharma_df['Date']=='2005-01-03'].index[0]
niftyPharma_df = niftyPharma_df[start:]


print("Auto index info:\n")
niftyAuto_df.info()


Auto index info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4092 entries, 254 to 4345
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      4092 non-null   object 
 1   Open      2414 non-null   float64
 2   High      2414 non-null   float64
 3   Low       2414 non-null   float64
 4   Close     4092 non-null   float64
 5   Volume    2461 non-null   float64
 6   Turnover  2461 non-null   float64
dtypes: float64(6), object(1)
memory usage: 223.9+ KB


### The difference in non-null count between date and open indicates null values : Here numerosity reduction is preferred as at these dates(old) ,
- Data wasn't collected for 'AUTO SECTOR', for daily open/ high / low / volume. 
- So during analysis we must just ignore those tuples ( which are before 2011 - seen by inspection)
- We can prefer to use CLOSE attribute for EDA only, analysis for this data can be done only using close data again

In [22]:
print("\n\nBank index info:\n")
niftyBank_df.info()



Bank index info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4092 entries, 1258 to 5349
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      4092 non-null   object 
 1   Open      3982 non-null   float64
 2   High      3982 non-null   float64
 3   Low       3982 non-null   float64
 4   Close     4092 non-null   float64
 5   Volume    4082 non-null   float64
 6   Turnover  4082 non-null   float64
dtypes: float64(6), object(1)
memory usage: 223.9+ KB


In [23]:
print("\n\nIT index info:\n")
niftyIT_df.info()



IT index info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4092 entries, 2235 to 6326
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      4092 non-null   object 
 1   Open      4092 non-null   float64
 2   High      4092 non-null   float64
 3   Low       4092 non-null   float64
 4   Close     4092 non-null   float64
 5   Volume    4082 non-null   float64
 6   Turnover  4082 non-null   float64
dtypes: float64(6), object(1)
memory usage: 223.9+ KB


In [24]:
print("\n\nFin_Services index info:\n")
niftyFin_Services_df.info()



Fin_Services index info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4092 entries, 254 to 4345
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      4092 non-null   object 
 1   Open      2315 non-null   float64
 2   High      2315 non-null   float64
 3   Low       2315 non-null   float64
 4   Close     4092 non-null   float64
 5   Volume    2417 non-null   float64
 6   Turnover  2417 non-null   float64
dtypes: float64(6), object(1)
memory usage: 223.9+ KB


In [25]:
print("\n\nPharma index info:\n")
niftyPharma_df.info()



Pharma index info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4092 entries, 1007 to 5098
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      4092 non-null   object 
 1   Open      2583 non-null   float64
 2   High      2583 non-null   float64
 3   Low       2583 non-null   float64
 4   Close     4092 non-null   float64
 5   Volume    3959 non-null   float64
 6   Turnover  3959 non-null   float64
dtypes: float64(6), object(1)
memory usage: 223.9+ KB


### There is a similar problem with Pharma, Financial, IT and bank Sector data as well
- So we prefer to just use the close attribute again.

In [26]:
#  We have the start point from 2009-12-01
goldPrices_df = pd.read_csv('../datasets/gold_prices.csv')

# need to reverse the dataset as last row has oldest date
goldPrices_df = goldPrices_df[::-1]

print("\n\nGold prices info:\n")
goldPrices_df.info()



Gold prices info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1660 entries, 1659 to 0
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Date                       1660 non-null   object 
 1   Open                       1660 non-null   float64
 2   High                       1660 non-null   float64
 3   Low                        1660 non-null   float64
 4   Close                      1660 non-null   float64
 5   WAP                        1660 non-null   float64
 6   No. of Shares              1660 non-null   float64
 7   No. of Trades              1660 non-null   float64
 8   Total Turnover             1660 non-null   float64
 9   Deliverable Quantity       1660 non-null   float64
 10  % Deli. Qty to Traded Qty  1660 non-null   float64
 11  Spread H-L                 1660 non-null   float64
 12  Spread C-O                 1660 non-null   float64
dtypes: float64(12), object(1)
m

### The data source for gold prices has given us perfectly clean data

## All data has been preprocessed, we can move to Descriptive Analytics

### DATA Attribute types:
- **The Open and Close columns** indicate the opening and closing price of the stocks on a particular day.                  *(CONTINUOUS Ratio val)*
- **The High and Low columns** provide the highest and the lowest price for the stock on a particular day, respectively.    *(CONTINUOUS Ratio val)*
- **The Volume column** tells us the total volume of stocks traded on a particular day.                                     *(CONTINUOUS Ratio val)*
- **The Turnover column** refers to the total value of stocks traded during a specific period of time.                      *(CONTINUOUS Ratio val)*
                (The time period may be annually, quarterly, monthly or daily)

### We plan to use the indices(as of now part of different dataframes) as *categorical* values : BANk , AUTO ,IT, Bullion(gold prices)

# Comparing Sector Indices with companies in that sector

## TCS and INFY vs NIFTY IT(Benchmark)

In [38]:
infy_df = pd.read_csv("../datasets/companies/INFY.csv")
start = infy_df[infy_df['Date']=='2005-01-03'].index[0]
infy_df = infy_df[start:]

tcs_df = pd.read_csv("../datasets/companies/TCS.csv")
start = tcs_df[tcs_df['Date']=='2005-01-03'].index[0]
tcs_df = tcs_df[start:]

fig = go.Figure()
        

fig.add_trace(go.Scatter(
         x=niftyIT_df['Date'],
         y=niftyIT_df['Close'],
        name='IT',
        line=dict(color='blue'),
    opacity=1))        

fig.add_trace(go.Scatter(
         x=infy_df['Date'],
         y=infy_df['Close'],
        name='Infy',
        line=dict(color='green'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=tcs_df['Date'],
         y=tcs_df['Close'],
        name='TCS',
        line=dict(color='pink'),
    opacity=1))
    
fig.update_layout(title_text="IT sector comparison", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## INSIGHTS FOR ABOVE PLOT
- **Infosys** took a hit during december 2014 because it took a while to replace the CEO, and the public perception to new CEO and his ideas also weren't that good.
- **IT** Took a hit during early 2020 due to covid and work-from-home restrictions freezing companies. But it recovers quickly as new changes improve employee efficiency and hence the companies get boosted performance.
- **TCS** The stocks drop by over half in may 2018 due to shares turning into ex-bonus.

## BAJAJ-Auto and TATAMOTORS vs AUTO index(benchmark)

In [28]:
bajaj_df = pd.read_csv("../datasets/companies/BAJAJ-AUTO.csv")
start = bajaj_df[bajaj_df['Date']=='2008-05-26'].index[0]
bajaj_df = bajaj_df[start:]

tatamotors_df = pd.read_csv("../datasets/companies/TATAMOTORS.csv")
start = tatamotors_df[tatamotors_df['Date']=='2008-05-26'].index[0]
tatamotors_df = tatamotors_df[start:]

fig = go.Figure()        

fig.add_trace(go.Scatter(
         y=niftyAuto_df[niftyAuto_df['Date']>='2008-05-26']['Close'],
         x=niftyAuto_df[niftyAuto_df['Date']>='2008-05-26']['Date'],
        name='AUTO',
        line=dict(color='blue'),
    opacity=1))        

fig.add_trace(go.Scatter(
         x=bajaj_df['Date'],
         y=bajaj_df['Close'],
        name='Bajaj',
        line=dict(color='green'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=tatamotors_df['Date'],
         y=tatamotors_df['Close'],
        name='Tata motors',
        line=dict(color='pink'),
    opacity=1))
    
fig.update_layout(title_text="Auto sector comparison", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## INSIGHTS FOR ABOBE PLOT
- **Bajaj** saw an all time high in August 2010 due to a new bike release that became popular
- **Tata motors** saw a big drop in September 2011 due to a shutdown of a popular model
- **AUTO** and **Bajaj** experience a drop during the start of 2020 due to covid restrictions

## PFizer and PANACEABIO vs Pharma index(benchmark) 
- the comparison can be done from 2005 
- PFIZER : Pfizer vaccine
- PANACEBIO : parent of serum ltd: Covidsheild

In [30]:
pfizer_df = pd.read_csv("../datasets/companies/PFIZER.csv")
start = pfizer_df[pfizer_df['Date']=='2005-01-03'].index[0]
pfizer_df = pfizer_df[start:]

panaceaBio_df = pd.read_csv("../datasets/companies/PANACEABIO.csv")
start = panaceaBio_df[panaceaBio_df['Date']=='2005-01-03'].index[0]
panaceaBio_df = panaceaBio_df[start:]

fig = go.Figure()
        

fig.add_trace(go.Scatter(
         y=niftyPharma_df[niftyPharma_df['Date']>='2005-01-03']['Close'],
         x=niftyPharma_df[niftyPharma_df['Date']>='2005-01-03']['Date'],
        name='Pharma',
        line=dict(color='blue'),
    opacity=1))        

fig.add_trace(go.Scatter(
         x=pfizer_df['Date'],
         y=pfizer_df['Close'],
        name='Pfizer',
        line=dict(color='green'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=panaceaBio_df['Date'],
         y=panaceaBio_df['Close'],
        name='PanaceaBio',
        line=dict(color='pink'),
    opacity=1))
    
fig.update_layout(title_text="Pharma sector comparison(2005+)", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## INSIGHTS FOR ABOVE PLOT
- **PanaceaBio** has no notable changes in stocks over the covid season. 
- **Pfizer** gained a lot during the start of covid due to gaining a partial approval of its vaccine in early April 2020 and later on a full approval in August 2020
- **Pharma** also took a dip during the initial covid phase and eventually reached its new all time high over the covid season.

## Trend during Covid (2019+) for PHARMA SECTOR ( COMPARIZON OF companies with benchmark)

In [32]:

niftyPharma_covid = niftyPharma_df[niftyPharma_df['Date']>='2019-01-01']

fig = go.Figure()
        

fig.add_trace(go.Scatter(
         x=niftyPharma_covid['Date'],
         y=niftyPharma_covid['Close'],
        name='Pharma',
        line=dict(color='blue'),
    opacity=1))        

fig.add_trace(go.Scatter(
         x=pfizer_df[pfizer_df['Date']>='2019-01-01']['Date'],
         y=pfizer_df[pfizer_df['Date']>='2019-01-01']['Close'],
        name='Pfizer',
        line=dict(color='green'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=panaceaBio_df[panaceaBio_df['Date']>='2019-01-01']['Date'],
         y=panaceaBio_df[panaceaBio_df['Date']>='2019-01-01']['Close'],
        name='PanaceaBio',
        line=dict(color='pink'),
    opacity=1))
    
fig.update_layout(title_text="Pharma sector comparison(COVID(2019+))", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## INSIGHTS For above plot
- We can clearly see **Pharma** as a whole rapidly increasing its stock value in this period due to covid.
- **Pfizer** has a pead in april due to the release of the vaccine.

## Bajfinance and LIC Housing Finance vs FIN Services index(benchmark)

In [33]:
bajFin_df = pd.read_csv("../datasets/companies/BAJFINANCE.csv")
start = bajFin_df[bajFin_df['Date']=='2005-01-03'].index[0]
bajFin_df = bajFin_df[start:]

lic_df = pd.read_csv("../datasets/companies/LICHSGFIN.csv")
start = lic_df[lic_df['Date']=='2005-01-03'].index[0]
lic_df = lic_df[start:]

fig = go.Figure()
        

fig.add_trace(go.Scatter(
         y=niftyFin_Services_df[niftyFin_Services_df['Date']>='2005-01-03']['Close'],
         x=niftyFin_Services_df[niftyFin_Services_df['Date']>='2005-01-03']['Date'],
        name='Fin Services',
        line=dict(color='blue'),
    opacity=1))        

fig.add_trace(go.Scatter(
         x=bajFin_df['Date'],
         y=bajFin_df['Close'],
        name='Bajaj Finance',
        line=dict(color='green'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=lic_df['Date'],
         y=lic_df['Close'],
        name='Lic Housing Finance ltd',
        line=dict(color='pink'),
    opacity=1))
    
fig.update_layout(title_text="Fin-Services sector comparison", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## INSIGHTS For above plot
- **Bajaj Finance** reached an all time high during July 2016 due to hold and wait strategy to artificially inflate the stock.
- All companies drop during early covid and then regain their usual value by the end of the first lockdown.

## Sbi Bank and HDFC vs Bank index(benchmark)


In [34]:
sbi_df = pd.read_csv("../datasets/companies/SBIN.csv")
start = sbi_df[sbi_df['Date']=='2005-01-03'].index[0]
sbi_df = sbi_df[start:]

hdfc_df = pd.read_csv("../datasets/companies/HDFCBANK.csv")
start = hdfc_df[hdfc_df['Date']=='2005-01-03'].index[0]
hdfc_df = hdfc_df[start:]

fig = go.Figure()
        

fig.add_trace(go.Scatter(
         y=niftyBank_df[niftyBank_df['Date']>='2005-01-03']['Close'],
         x=niftyBank_df[niftyBank_df['Date']>='2005-01-03']['Date'],
        name='Bank',
        line=dict(color='blue'),
    opacity=1))        

fig.add_trace(go.Scatter(
         x=sbi_df['Date'],
         y=sbi_df['Close'],
        name='SBI Bank',
        line=dict(color='green'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=hdfc_df['Date'],
         y=hdfc_df['Close'],
        name='HDFC Bank',
        line=dict(color='pink'),
    opacity=1))
    
fig.update_layout(title_text="Banking sector comparison", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## INSIGHTS For above plot
- **SBI Bank** subdivided its equity shares on November 2014 resulting in a sharp drop in its value.
- The sector as a whole took a hit and then reached a new all time high due to increase in online banking.

## Reliance vs ITC (FMCG sector)

In [39]:
itc_df = pd.read_csv("../datasets/companies/ITC.csv")
start = itc_df[itc_df['Date']=='2005-01-03'].index[0]
itc_df = itc_df[start:]

reliance_df = pd.read_csv("../datasets/companies/RELIANCE.csv")
start = reliance_df[reliance_df['Date']=='2005-01-03'].index[0]
reliance_df = reliance_df[start:]

fig = go.Figure()
        

fig.add_trace(go.Scatter(
         y=itc_df['Close'],
         x=itc_df['Date'],
        name='ITC',
        line=dict(color='blue'),
    opacity=1))        

fig.add_trace(go.Scatter(
         x=reliance_df['Date'],
         y=reliance_df['Close'],
        name='Reliance',
        line=dict(color='green'),
    opacity=1))


fig.update_layout(title_text="FMCG sector(Close) comparison", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## INSIGHTS For above plot
- **ITC** subdivided its stocks on August 2015 resulting in a massive drop in value.
- **Reliance** founded JIO in 2007 resulting in a spike in stock prices.

In [36]:
niftyAuto_covid = niftyAuto_df[(niftyAuto_df['Date'] >= '2019-01-01')]
niftyBank_covid = niftyBank_df[(niftyBank_df['Date'] >= '2019-01-01')]
niftyIT_covid = niftyIT_df[(niftyIT_df['Date'] >= '2019-01-01')]
niftyFin_Services_covid = niftyFin_Services_df[(niftyFin_Services_df['Date'] >= '2019-01-01')]
niftyPharma_covid = niftyPharma_df[(niftyPharma_df['Date'] >= '2019-01-01')]

fig = go.Figure()

fig = make_subplots(rows=3, cols=2,
        subplot_titles=("AUTO", " Bank", "IT", "Fin services", "Pharma"," ")
        
)

fig.add_trace(go.Scatter(
         x=niftyAuto_covid['Date'],
         y=niftyAuto_covid['Close']),
    
        row=1, col=1)        

fig.add_trace(go.Scatter(
         x=niftyBank_covid['Date'],
         y=niftyBank_covid['Close']),

        row=1, col=2)


fig.add_trace(go.Scatter(
         x=niftyIT_covid['Date'],
         y=niftyIT_covid['Close']),
        row=2, col=1)


fig.add_trace(go.Scatter(
         x=niftyFin_Services_covid['Date'],
         y=niftyFin_Services_covid['Close']),
        row=2, col=2)


fig.add_trace(go.Scatter(
         x=niftyPharma_covid['Date'],
         y=niftyPharma_covid['Close']),
        row=3,col=1)



    
fig.update_layout(title_text="Covid Season Trends of sectors")

fig.show()

## INSIGHTS For above plot
- Automobile, Banking , Financial Serivices sectors took a hit during Covid-19.
- Both Pharma and IT haven't seen this dip 
- Pharma: Recorded a high as people were bullish on the development of vaccines
- IT: Closing down of industries hasn't been a hurdle to this sector as it was possible for employess to work from home.

# Trends observed because of/during the Great Recession (2008)


In [None]:
nifty50_2008_to_2010 = nifty50_df[(nifty50_df['Date'] >= '2008-01-01') & (nifty50_df['Date'] <= '2010-12-31')]
fig = go.Figure()
fig.add_trace(go.Scatter(
         x=nifty50_2008_to_2010['Date'],
         y=nifty50_2008_to_2010['Low'],
         name='Price',
    line=dict(color='red'),
    opacity=1))
        
    
fig.update_layout(title_text="NIFTY-50 Trend (2008 - 2010)", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## We see from this figure that in general the stock market has recovered from losses around/after July 2010
- India is a developing country and did not depend only on trades, etc,.
- The housing crisis didn't affect the South Asian countries and even if it did, the sectors bounced back with rigour.

# Sector wise comparison of trend during recession

In [None]:
niftyBank_2008_to_2010 = niftyBank_df[(niftyBank_df['Date'] >= '2008-01-01') & (niftyBank_df['Date'] <= '2010-12-31')]
niftyIT_2008_to_2010 = niftyIT_df[(niftyIT_df['Date'] >= '2008-01-01') & (niftyIT_df['Date'] <= '2010-12-31')]
niftyAuto_2008_to_2010 = niftyAuto_df[(niftyAuto_df['Date'] >= '2008-01-01') & (niftyAuto_df['Date'] <= '2010-12-31')]
niftyFin_Services_2008_to_2010 = niftyFin_Services_df[(niftyFin_Services_df['Date'] >= '2008-01-01') & (niftyFin_Services_df['Date'] <= '2010-12-31')]
niftyPharma_2008_to_2010 = niftyPharma_df[(niftyPharma_df['Date'] >= '2008-01-01') & (niftyPharma_df['Date'] <= '2010-12-31')]

fig = go.Figure()
fig.add_trace(go.Scatter(
         x=niftyBank_2008_to_2010['Date'],
         y=niftyBank_2008_to_2010['Close'],
        name='Bank',
        line=dict(color='orange'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=niftyIT_2008_to_2010['Date'],
         y=niftyIT_2008_to_2010['Close'],
        name='IT',
        line=dict(color='blue'),
    opacity=1))        

fig.add_trace(go.Scatter(
         x=niftyAuto_2008_to_2010['Date'],
         y=niftyAuto_2008_to_2010['Close'],
        name='Auto',
        line=dict(color='green'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=niftyFin_Services_2008_to_2010['Date'],
         y=niftyFin_Services_2008_to_2010['Close'],
        name='Fin_Services',
        line=dict(color='pink'),
    opacity=1))

fig.add_trace(go.Scatter(
         x=niftyPharma_2008_to_2010['Date'],
         y=niftyPharma_2008_to_2010['Close'],
        name='Pharma',
        line=dict(color='yellow'),
    opacity=1))
    
fig.update_layout(title_text="NIFTY-50 Trend (2008 - 2010)", xaxis_title = 'Year', yaxis_title='Value')

fig.show()

## From the above plot we can clearly see that:
- All sectors got hit, but Auto sector in particular has been affected the most.
- Banking Sector has the quickest recovery.
- India being a developing country had recovered faster than developed countries which was seen from NIFTY-50 Trend fig, so all sectors have recovered by July 2010.
- What we see is PHARMA sector as well as IT Sectors have seen a positive growth in the SHARE prices Whereas Sectors like AUTO, BANK, Fin Services all out-performed IT and Pharma sector before the 3rd lockdown 
- IT sector has seen a boom because of everyone trying to connect to each other digitally and everything moving to a digital format to facilitate it.
- Pharma sector has seen a boom because of the vaccine-season as well as other essential drugs to reduce the spread of the virus.

# We can check bullish/bearish patterns to try and predict stock values on a low scale


## Assumptions
- Post covid, people avoid public transport and buy their own vehicles to commute.
- It would be considered as a good time to invest in any company in the AUTO sector.


## Chart of the AUTO Index for post covid

In [None]:
niftyAuto_recent = niftyAuto_df[niftyAuto_df['Date'] > '2020-12-31'].copy()    # all data of 2021

niftyAuto_recent['Date'] = pd.to_datetime(niftyAuto_recent['Date'])
niftyAuto_recent.set_index('Date',inplace=True)

fig = go.Figure(go.Candlestick(x=niftyAuto_recent.index, open=niftyAuto_recent.Open, high=niftyAuto_recent.High, low=niftyAuto_recent.Low, close=niftyAuto_recent.Close))
fig.show()



## Some Observations
- Clearly at the start of the year we see **Bearish** patterns(open < close price), so an investor if he/she bought shares during 1st week of January and knew that there would be a dip mid February and chose to sell. This would be a great investment.
- The investment during this period can be categorized as **Bearish** , so the expected returns from stock would be greater than FD, or real-estate or bullion(gold) during the same time-period.
- **CANDLE CHARTS** can clearly capture the fluctuations caused by external factors(covid-19) which can be seen in the above example.


## Current turnovers of different sectors

In [37]:
niftyMetal_df = pd.read_csv("../datasets/inidices/NIFTY METAL.csv")

nifty_IT_current = niftyIT_df[niftyIT_df['Date'] > '2019-12-31']['Turnover'].mean()
nifty_bank_current = niftyBank_df[niftyBank_df['Date'] > '2019-12-31']['Turnover'].mean()
nifty_auto_current = niftyAuto_df[niftyAuto_df['Date'] > '2019-12-31']['Turnover'].mean()
nifty_metal_current = niftyMetal_df[niftyMetal_df['Date'] > '2019-12-31']['Turnover'].mean()
nifty_pharma_current = niftyPharma_df[niftyPharma_df['Date'] > '2019-12-31']['Turnover'].mean()
nifty_fin_services_current = niftyFin_Services_df[niftyFin_Services_df['Date']>'2019-12-31']['Turnover'].mean()

labels = ['Auto','Bank','Pharma','Fin Services','IT']

values = [nifty_auto_current, nifty_bank_current, nifty_pharma_current,nifty_fin_services_current,nifty_IT_current]

fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_layout(title_text="Current Turnovers of Different Sectors")
fig.show()