In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import f_oneway

%matplotlib inline

In [None]:
df = pd.read_csv("C:/Users/HP/Downloads/stocks.csv", parse_dates=['Date'])
df.set_index('Date', inplace=True)
df.sort_index(inplace=True)
df.head()


In [None]:

plt.figure(figsize=(14, 6))
plt.plot(df['Close'], label='Nifty 50 Close Price')
plt.title('Nifty 50 Index (2000–2024)')
plt.xlabel('Date')
plt.ylabel('Index Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Mark key economic events
events = {
    '2008 Financial Crisis': '2008-09-15',
    '2016 Demonetization': '2016-11-08',
    'COVID-19 Crash': '2020-03-23'
}

plt.figure(figsize=(14, 6))
plt.plot(df['Close'], label='Nifty 50 Close Price')

for event, date in events.items():
    plt.axvline(pd.to_datetime(date), color='red', linestyle='--', label=event)

plt.title('Nifty 50 with Major Economic Events')
plt.xlabel('Date')
plt.ylabel('Index Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:

df['Year'] = df.index.year
returns_by_year = [group['Close'].pct_change().dropna() for _, group in df.groupby('Year') if len(group) > 250]
anova_result = f_oneway(*returns_by_year)
anova_result


In [None]:

# Simple model using lag features
df['Prev_Close'] = df['Close'].shift(1)
df = df.dropna()

X = df[['Prev_Close']]
y = df['Close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# KNN Model
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)
knn_preds = knn.predict(X_test)

# Linear Regression Model
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_test)

# Evaluation
print("KNN R²:", r2_score(y_test, knn_preds))
print("LR R²:", r2_score(y_test, lr_preds))