### HuggingFace Tutorial

In [3]:
import pandas as pd
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
import joblib
from pathlib import Path
import numpy as np
import sys
import os
import warnings
from pathlib import Path
from sklearn.pipeline import make_pipeline
from transformers import AutoModel
from huggingface_hub import HfFolder
from dotenv import load_dotenv
import os



In [8]:
# Get the current working directory
current_dir = os.getcwd()

# Assuming the current directory is 'models', get the path of the 'TAQ-Query-Scripts' directory
parent_dir = os.path.dirname(current_dir)

# Add the parent directory to sys.path
sys.path.append(parent_dir)

token = os.getenv("hf_token")
user_token= os.getenv("hf_user_token")

# Use the token to authenticate
HfFolder.save_token(token)

### OLS Walkthrough

In [6]:
# Load the dataset
df_test = pd.read_csv('../sample_data/sample_features.csv')
df_test['Trade_Price'] = df_test['Trade_Price'].fillna(df_test['Trade_Price'].median())

# Selecting features for the model (adjust as necessary)
features = ['Trade_Volume', 'Bid_Price', 'Offer_Price']  # Example features
target = 'Trade_Price'

# Drop rows with missing target or features
df_test = df_test.dropna(subset=[target] + features)

# Check if the DataFrame is empty
if df_test.empty:
    print("No data available after removing missing values.")
else:
    # Define X (features) and y (target)
    X = df_test[features]
    y = df_test[target]

    # Adding a constant to the model (important for OLS regression)
    X = sm.add_constant(X)

    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create the OLS model
    model = sm.OLS(y_train, X_train).fit()

    # Print model summary
    print(model.summary())

    # Save the model to a file
    model_filename = 'ols_model/ols_model.pkl'
    joblib.dump(model, model_filename)

    print(f"Model saved as {model_filename}")

                            OLS Regression Results                            
Dep. Variable:            Trade_Price   R-squared:                      -5.217
Model:                            OLS   Adj. R-squared:                 -5.232
Method:                 Least Squares   F-statistic:                    -333.6
Date:                Fri, 05 Jan 2024   Prob (F-statistic):               1.00
Time:                        16:42:56   Log-Likelihood:                 21923.
No. Observations:                 798   AIC:                        -4.384e+04
Df Residuals:                     795   BIC:                        -4.383e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const          294.6500   5.75e-12   5.13e+13   

### Push Model to HuggingFace

For write access you will need to use a user-access token. You can create one by going to your profile settings and clicking on "New token". Make sure to copy the token after creating it, as you won't be able to access it afterwards.

In [9]:
# push the model to the hub
model_name = "ols_model"
model_path = Path('ols_model/ols_model.pkl')
model = AutoModel.from_pretrained(model_path)
model.push_to_hub(model_name, use_auth_token=user_token)



OSError: ols_model/ols_model.pkl is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

# To do
1. add parameter estimate
2. chart with intervals/estimate and p-values


### Load Model from HuggingFace Hub

In [2]:
#model = AutoModel.from_pretrained("taqdatabase/OLS")