In [None]:
import yfinance as yf
import pandas as pd 
import matplotlib.pyplot as plt

#loading the NVIDIA stock data
ticker = yf.Ticker("NVDA")
data = ticker.history(period="1y")

# Display the first few rows of the data
data.head()

# Plotting the closing price of NVIDIA stock
data['Close'].plot(figsize=(12,6), title='NVIDIA Stock Price (at close)')
plt.title("NVIDIA Stock Closing Prices - Last 5 Years")
plt.xlabel("Date")
plt.ylabel("Price ($)")
plt.grid()
plt.legend(['Close Price'])
plt.show()

# Displaying the summary statistics of the data
data.describe()

print(data.isnull().sum())  # Checking for missing values in the dataset
data.dropna(inplace=True)  # Dropping any rows with missing values

data['Daily Return'] = data['Close'].pct_change()  # Daily returns

data['MA7'] = data['Close'].rolling(window=7).mean()  # 7-day moving average
data['MA21'] = data['Close'].rolling(window=21).mean()  # 21-day moving average

data['price_diff'] = data['Close'] - data['Open']  # Daily price difference if the stock closed higher or lower
data['close_to_open'] = data['Close'] / data['Open']  # Close to Open ratio     
data['volume_change'] = data['Volume'].pct_change()  # Daily volume change

data.dropna(inplace=True)  # Dropping any rows with NaN values after calculations

data['target'] = data['Close'].shift(-1)  # Target variable for prediction
data.dropna(inplace=True)  # Dropping any rows with NaN values after shifting

features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Daily Return', 'MA7', 'MA21',
            'price_diff', 'close_to_open', 'volume_change']
x = data[features]  # Features for the model
y = data['target']  # Target variable

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
print(X.head())
print(y.head())
# Splitting the data into training and testing sets