In [1]:
%pip install pandas 





In [2]:
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd

In [5]:
data=pd.read_csv("EDA_processing.csv")

In [6]:
data

Unnamed: 0,Open,High,Low,Close,Volume,Market Cap,PE Ratio,EPS,Debt to Equity,Price to Book Ratio,...,month_8,month_9,month_10,month_11,month_12,day_of_week_Friday,day_of_week_Monday,day_of_week_Thursday,day_of_week_Tuesday,day_of_week_Wednesday
0,1.156786,1.162679,1.117857,1.130179,293751500,4,2,3,4,4,...,False,False,False,False,False,False,True,False,False,False
1,1.139107,1.169107,1.124464,1.141786,293751500,4,2,3,4,4,...,False,False,False,False,False,False,False,False,True,False
2,1.151071,1.165179,1.143750,1.151786,293751500,4,2,3,4,4,...,False,False,False,False,False,False,False,False,False,True
3,1.154821,1.159107,1.130893,1.152679,293751500,4,2,3,4,4,...,False,False,False,False,False,False,False,True,False,False
4,1.160714,1.243393,1.156250,1.236607,293751500,4,2,3,4,4,...,False,False,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23050,319.531527,323.578279,316.243249,319.843610,2299500,3,3,0,3,3,...,False,False,True,False,False,True,False,False,False,False
23051,319.531527,323.578279,316.243249,319.843610,2797100,3,3,0,3,3,...,False,False,True,False,False,False,True,False,False,False
23052,319.531527,323.578279,316.243249,319.843610,2932800,3,3,0,3,3,...,False,False,True,False,False,False,False,False,True,False
23053,319.531527,323.578279,316.243249,319.843610,2494300,3,3,0,3,3,...,False,False,True,False,False,False,False,False,False,True


In [7]:
# Test-Train Split
from sklearn.model_selection import train_test_split

X = data.drop('Close', axis=1)
y = data['Close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [9]:
models = [
("Linear Regressor", LinearRegression()),
("Decision Tree Regressor", DecisionTreeRegressor()),
("Random Forest Regressor", RandomForestRegressor()),
("XGBoost Regressor", XGBRegressor())
]

In [10]:
reports = []

for name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    reports.append((name, model, rmse, mae, r2))  

In [11]:
for name, model, rmse, mae, r2 in reports:
    print(f"Model: {name}")
    print(f"RMSE: {rmse}")
    print(f"MAE: {mae}")
    print(f"R2: {r2}")
    print("\n")

Model: Linear Regressor
RMSE: 0.5843041328410256
MAE: 0.3566446977902832
R2: 0.9999327464216903


Model: Decision Tree Regressor
RMSE: 1.352069492358508
MAE: 0.512166724093641
R2: 0.9998443764019905


Model: Random Forest Regressor
RMSE: 0.8272449586045726
MAE: 0.4014249981707064
R2: 0.9999047838608734


Model: XGBoost Regressor
RMSE: 1.3205860344281637
MAE: 0.5532602744188362
R2: 0.9998480001572995




In [12]:
%pip install mlflow


Note: you may need to restart the kernel to use updated packages.


In [13]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost
import pandas as pd


In [14]:

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("FAANG Stock Closing Price Prediction")
for name, model, rmse, mae, r2 in reports:
    with mlflow.start_run(run_name=name) as run:
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("MAE", mae)
        mlflow.log_metric("R2", r2)
        
        if name == "Linear Regressor":
            mlflow.sklearn.log_model(model, "LR_model")
        elif name == "Decision Tree Regressor":
            mlflow.sklearn.log_model(model, "DT_model")
        elif name == "Random Forest Regressor":
            mlflow.sklearn.log_model(model, "RF_model")
        elif name == "XGBoost Regressor":
            mlflow.xgboost.log_model(model, "XGB_model")

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



🏃 View run Linear Regressor at: http://127.0.0.1:5000/#/experiments/898113296110845786/runs/976e45d14d324f6d8927f9fe4b76206d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/898113296110845786




🏃 View run Decision Tree Regressor at: http://127.0.0.1:5000/#/experiments/898113296110845786/runs/1f28644c3c6e402a90bdaa26e2923bfc
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/898113296110845786




🏃 View run Random Forest Regressor at: http://127.0.0.1:5000/#/experiments/898113296110845786/runs/281926c9e8114f6fb081a2caedecd981
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/898113296110845786




🏃 View run XGBoost Regressor at: http://127.0.0.1:5000/#/experiments/898113296110845786/runs/75139891eeab41c5ba490a454b2182cf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/898113296110845786


In [15]:
model_name ='Random Forest Regressor'
run_id = '281926c9e8114f6fb081a2caedecd981'
model_uri = f'runs:/{run_id}/RF_model'

with mlflow.start_run(run_id=run_id):
    mlflow.register_model(model_uri= model_uri , name= model_name)

Registered model 'Random Forest Regressor' already exists. Creating a new version of this model...
2025/01/31 17:07:53 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Random Forest Regressor, version 2
Created version '2' of model 'Random Forest Regressor'.


🏃 View run Random Forest Regressor at: http://127.0.0.1:5000/#/experiments/898113296110845786/runs/281926c9e8114f6fb081a2caedecd981
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/898113296110845786


In [None]:
import pickle

with open("standard_scaler.pkl", "wb") as s:
    pickle.dump(scaler, s)