In [16]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
std_scaler = pickle.load(open(r'D:\FAANG _SP\standard_scaler.pkl', 'rb'))


In [17]:
data=pd.read_csv('D:\\FAANG _SP\\FAANG - 5.csv')
data.head()

Unnamed: 0,Open,High,Low,Close,Volume,Market Cap,PE Ratio,EPS,Debt to Equity,Price to Book Ratio,Apple,Facebook,Google,Amazon,Netflix
0,1.156786,1.162679,1.117857,1.130179,293751500,3,2,3,0,1,0,1,0,0,0
1,1.139107,1.169107,1.124464,1.141786,293751500,3,2,3,0,1,0,1,0,0,0
2,1.151071,1.165179,1.14375,1.151786,293751500,3,2,3,0,1,0,1,0,0,0
3,1.154821,1.159107,1.130893,1.152679,293751500,3,2,3,0,1,0,1,0,0,0
4,1.160714,1.243393,1.15625,1.236607,293751500,3,2,3,0,1,0,1,0,0,0


In [18]:
# Test-Train Split
from sklearn.model_selection import train_test_split

X = data.drop('Close', axis=1)
y = data['Close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [19]:
# Standard Scaler
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_test_scaled = scaler.transform(X_test)  # X_test must also have 14 columns
pickle.dump(scaler, open("standard_scaler.pkl", "wb"))





In [20]:
print(X_train.shape)  # Should be (n_samples, 14)
print(X_test.shape)   # Should also be (n_samples, 14)


(18444, 14)
(4611, 14)


In [21]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [22]:
models = [
("Linear Regressor", LinearRegression()),
("Decision Tree Regressor", DecisionTreeRegressor()),
("Random Forest Regressor", RandomForestRegressor()),
("XGBoost Regressor", XGBRegressor())
]

In [23]:
reports = []

for name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    reports.append((name, model, rmse, mae, r2))

In [24]:
for name, model, rmse, mae, r2 in reports:
    print(f"Model: {name}")
    print(f"RMSE: {rmse}")
    print(f"MAE: {mae}")
    print(f"R2: {r2}")
    print("\n")

Model: Linear Regressor
RMSE: 0.5842477661994834
MAE: 0.35700672102228503
R2: 0.9999327529095073


Model: Decision Tree Regressor
RMSE: 1.3163834348687322
MAE: 0.504505789742274
R2: 0.9998484838777503


Model: Random Forest Regressor
RMSE: 0.7790680889008283
MAE: 0.39467434530393564
R2: 0.9999103290328091


Model: XGBoost Regressor
RMSE: 1.2047963602788145
MAE: 0.5253679162668139
R2: 0.9998613275830015




In [25]:
import subprocess
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("FAANG Stock Closing Price Prediction")

for name, model, rmse, mae, r2 in reports:
    with mlflow.start_run(run_name=name) as run:
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("MAE", mae)
        mlflow.log_metric("R2", r2)

        if name == "Linear Regressor":
            mlflow.sklearn.log_model(model, "LR_model")
        elif name == "Decision Tree Regressor":
            mlflow.sklearn.log_model(model, "DT_model")
        elif name == "Random Forest Regressor":
            mlflow.sklearn.log_model(model, "RF_model")
        elif name == "XGBoost Regressor":
            mlflow.xgboost.log_model(model, "XGB_model")



üèÉ View run Linear Regressor at: http://127.0.0.1:5000/#/experiments/117605597841153976/runs/6c927e6a5ac747af93f7523ae7067c0f
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/117605597841153976




üèÉ View run Decision Tree Regressor at: http://127.0.0.1:5000/#/experiments/117605597841153976/runs/55140690a67b4a5abff577b7b36e267c
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/117605597841153976




üèÉ View run Random Forest Regressor at: http://127.0.0.1:5000/#/experiments/117605597841153976/runs/5d11c6bda64c4d628ac8bbef712d194c
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/117605597841153976


  self.get_booster().save_model(fname)


üèÉ View run XGBoost Regressor at: http://127.0.0.1:5000/#/experiments/117605597841153976/runs/6f50703b43774a8a817f3460e39e216a
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/117605597841153976


In [26]:
model_name ='Random Forest Regressor'
run_id = '7ac86e7204294684ade81bdf2df07a6f'
model_uri = f'runs:/{run_id}/RF_model'

with mlflow.start_run(run_id=run_id):
    mlflow.register_model(model_uri= model_uri , name= model_name)

Registered model 'Random Forest Regressor' already exists. Creating a new version of this model...
2025/11/27 09:56:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Random Forest Regressor, version 5


üèÉ View run Random Forest Regressor at: http://127.0.0.1:5000/#/experiments/117605597841153976/runs/7ac86e7204294684ade81bdf2df07a6f
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/117605597841153976


Created version '5' of model 'Random Forest Regressor'.


In [27]:
import pickle

with open("standard_scaler.pkl", "wb") as s:
    pickle.dump(scaler, s)