In [118]:
import pandas as pd
import numpy as np

In [119]:
pip install yfinance pandas scikit-learn matplotlib bolt-python

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [120]:
import yfinance as yf


def get_nq_data(start_date, end_date, interval='1m'):
    nq = yf.download('NQ=F', start=start_date, end=end_date, interval=interval)
    return nq

In [121]:
# to satisfy the yfinance 8 day restriction on the 1min timeframe data, we will compile this model on a maximum of 8 days of data. 

In [122]:
data = get_nq_data('2025-05-24', '2025-05-31')

[*********************100%***********************]  1 of 1 completed


In [123]:
print(data.head())
print(data.columns)

Price                         Close      High       Low      Open Volume
Ticker                         NQ=F      NQ=F      NQ=F      NQ=F   NQ=F
Datetime                                                                
2025-05-25 22:00:00+00:00  20963.75  20987.75  20954.25  20978.00      0
2025-05-25 22:01:00+00:00  20984.75  20984.75  20958.25  20964.00    144
2025-05-25 22:02:00+00:00  20982.25  20989.25  20976.50  20983.50    215
2025-05-25 22:03:00+00:00  20996.00  21006.75  20985.25  20985.75    219
2025-05-25 22:04:00+00:00  20993.50  20998.50  20984.50  20994.75    113
MultiIndex([( 'Close', 'NQ=F'),
            (  'High', 'NQ=F'),
            (   'Low', 'NQ=F'),
            (  'Open', 'NQ=F'),
            ('Volume', 'NQ=F')],
           names=['Price', 'Ticker'])


In [124]:
data = data.dropna()
data = data.reset_index()
data['Datetime'] = pd.to_datetime(data['Datetime'])

In [125]:
print(data.head())

Price                   Datetime     Close      High       Low      Open  \
Ticker                                NQ=F      NQ=F      NQ=F      NQ=F   
0      2025-05-25 22:00:00+00:00  20963.75  20987.75  20954.25  20978.00   
1      2025-05-25 22:01:00+00:00  20984.75  20984.75  20958.25  20964.00   
2      2025-05-25 22:02:00+00:00  20982.25  20989.25  20976.50  20983.50   
3      2025-05-25 22:03:00+00:00  20996.00  21006.75  20985.25  20985.75   
4      2025-05-25 22:04:00+00:00  20993.50  20998.50  20984.50  20994.75   

Price  Volume  
Ticker   NQ=F  
0           0  
1         144  
2         215  
3         219  
4         113  


In [126]:
data['prev_close'] = data['Close'].shift(1)
data['momentum'] = data['Close'].squeeze() - data['prev_close']
data = data.dropna()

In [127]:
print(data.head())

Price                   Datetime     Close      High       Low      Open  \
Ticker                                NQ=F      NQ=F      NQ=F      NQ=F   
1      2025-05-25 22:01:00+00:00  20984.75  20984.75  20958.25  20964.00   
2      2025-05-25 22:02:00+00:00  20982.25  20989.25  20976.50  20983.50   
3      2025-05-25 22:03:00+00:00  20996.00  21006.75  20985.25  20985.75   
4      2025-05-25 22:04:00+00:00  20993.50  20998.50  20984.50  20994.75   
5      2025-05-25 22:05:00+00:00  21000.25  21000.25  20991.75  20991.75   

Price  Volume prev_close momentum  
Ticker   NQ=F                      
1         144   20963.75    21.00  
2         215   20984.75    -2.50  
3         219   20982.25    13.75  
4         113   20996.00    -2.50  
5          58   20993.50     6.75  


In [128]:
features = ['prev_close', 'momentum']
X = data[features]
y = data['Close'].shift(-1)  # Next minute’s close

X = X.iloc[:-1]
y = y.iloc[:-1]

In [129]:
split_index = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

In [130]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.6f}")

Mean Squared Error: 65.887510


In [131]:
#Each value is the result of training the model after a new feature was added
results = [134.99, 65.88]

In [None]:
#Documentation
#prev_close and momentum were the best features for this linear regression model.
#adding any other features simply damaged the accuracy of the model
#we achieved a 65.88 MSE which seems to be the pinnacle of this models predictive capability given the data at hand
#next step - use XGBoost ML Model to see if we can achieve higher accuracy. 