In [None]:
# ===============================
# import libraries
# ===============================
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [None]:
# ===============================
# loading NVIDIA stock data
# ===============================
ticker = yf.Ticker("NVDA")
data = ticker.history(period="1y")  # loading last 1 year of data


In [None]:
# ===============================
# Plotting closing price
# ===============================
plt.figure(figsize=(12, 6))
data['Close'].plot(title='NVIDIA Stock Closing Prices - Last 1 Year')
plt.xlabel("Date")
plt.ylabel("Price ($)")
plt.grid(True)
plt.legend(['Close Price'])
plt.show()


In [None]:
# ===============================
# cleaning the data and feature engineering
# ===============================
print("Missing values before cleaning:\n", data.isnull().sum())
data.dropna(inplace=True)

# feature engineering
data['Daily Return'] = data['Close'].pct_change()
data['MA7'] = data['Close'].rolling(window=7).mean()
data['MA21'] = data['Close'].rolling(window=21).mean()
data['price_diff'] = data['Close'] - data['Open']
data['close_to_open'] = data['Close'] / data['Open']
data['volume_change'] = data['Volume'].pct_change()

# Remove rows with NaN values after feature engineering
data.dropna(inplace=True)


In [None]:
# ===============================
# preparing features and target variable for the model
# ===============================
data['target'] = data['Close'].shift(-1)  #next day's closing price as target
data.dropna(inplace=True)

features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Daily Return',
            'MA7', 'MA21', 'price_diff', 'close_to_open', 'volume_change']

X = data[features]
y = data['target']

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
print(X.head())
print(y.head())


In [None]:
# ===============================
# train-test split
# ===============================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# ===============================
# linear regression model
# ===============================
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
# ===============================
# predicting and evaluating the model
# ===============================
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"📏 Mean Absolute Error (MAE): {mae}")
print(f"📏 Root Mean Squared Error (RMSE): {rmse}")

In [None]:
# ===============================
# Visualization: actual vs predicted
# ===============================
plt.figure(figsize=(12, 6))
plt.plot(y_test.index, y_test, label='Actual Prices', color='blue')
plt.plot(y_test.index, y_pred, label='Predicted Prices', color='red')
plt.title('Linear Regression: Actual vs Predicted Stock Price')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# ===============================
# saving the model
# ===============================
# import joblib
# joblib.dump(model, 'nvda_stock_model.pkl')