Question 5: Advanced Data Storage and Model Serialization

Objective: Evaluate understanding of advanced database storage mechanisms and model storage.

Task:

Design a SQL schema for storing machine learning models that can be linked to different metals and their respective trading strategies.
Serialize a simple Scikit-learn model and store it in the database (HELP: this could be storing the file name of the pickled object, with some metadata about the stored model).
Write a CRUD function to fetch this model from the database.

In [9]:
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, JSON, insert, select
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy.ext.declarative import declarative_base
import joblib
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import pickle

In [10]:

Base = declarative_base()

#here we crate the database schema
class Metal(Base):
    __tablename__ = 'metal'

    id = Column(Integer, primary_key=True)
    name = Column(String, unique=True)

class Strategy(Base):
    __tablename__ = 'strategy'

    id = Column(Integer, primary_key=True)
    name = Column(String, unique=True)

class Model(Base):
    __tablename__ = 'model'

    id = Column(Integer, primary_key=True)
    name = Column(String)
    file_path = Column(String)
    model_metadata = Column(JSON)

    metal_id = Column(Integer, ForeignKey('metal.id')) #many-to-one relationship with metalid
    strategy_id = Column(Integer, ForeignKey('strategy.id')) #many-to-one relationship with strategy id

    metal = relationship('Metal', back_populates='models')
    strategy = relationship('Strategy', back_populates='models')

Metal.models = relationship('Model', back_populates='metal')
Strategy.models = relationship('Model', back_populates='strategy')

# Define the database connection
database_uri = "sqlite:///../data/my_database.db"
engine = create_engine(database_uri, pool_pre_ping=True)

# Create the tables
Base.metadata.create_all(engine)

In [11]:
# Insert a record into the 'metal' table using the engine
insert_metal = insert(Metal).values(name='Copper')
engine.execute(insert_metal)

# Retrieve the ID of 'Copper' from the 'metal' table using the engine
select_metal_id = select([Metal.id]).where(Metal.name == 'Copper')
result = engine.execute(select_metal_id)
copper_id = result.scalar()

# Insert a record into the 'strategy' table using the engine
insert_strategy = insert(Strategy).values(name='LagReturnsStrategy')
engine.execute(insert_strategy)

# Retrieve the ID of 'LagReturnsStrategy' from the 'strategy' table using the engine
select_strategy_id = select([Strategy.id]).where(Strategy.name == 'LagReturnsStrategy')
result = engine.execute(select_strategy_id)
strategy_id = result.scalar()

In [12]:
strategy_id

1

In [13]:

#get copper data to create simple sklearn model
query = "SELECT * FROM MetalPrices WHERE Metal = 'Copper'"

# Use pandas to read the query result into a DataFrame
df = pd.read_sql_query(query, engine)
df['returns'] = np.log(df.Price.pct_change()+1) 
df.drop(columns=['ID','Date','Metal','Price'], axis=1, inplace=True) #drop the cols we dont need to build model
df['lag_returns'] = df['returns'].shift(1)
df.dropna(inplace=True)

In [14]:
df.head()

Unnamed: 0,returns,lag_returns
2,-0.009499,0.002265
3,0.001467,-0.009499
4,0.001709,0.001467
5,0.004705,0.001709
6,0.000324,0.004705


In [15]:
#creating basic simple linear regression that uses last returns to find next returns
model = LinearRegression()
model.fit(np.array(df['lag_returns']).reshape(-1,1),df['returns'])
model.coef_

array([-0.04622852])

In [16]:
#creating pickled object
model_file_path = "model.pkl"
joblib.dump(model, model_file_path)

['model.pkl']

In [17]:

# Create a session
Session = sessionmaker(bind=engine)
session = Session()

# Define metadata for the model
model_metadata = {
    "author": "Ruthvik Konduru",
    "description": "Simple linear regression model using returns shifted by 1 to predict returns",
    # Add other metadata fields as needed
}

#creating model with using copper and lagreturnsstrategy
new_model = Model(
    name="LagReturnsModel",
    file_path=model_file_path,
    model_metadata=model_metadata,
    metal_id=copper_id,         
    strategy_id=strategy_id 
)
session.add(new_model)

# Commit the changes to the database
session.commit()

In [23]:
def get_model_by_name(session, model_name):
    model = session.query(Model).filter(Model.name == model_name).first()
    return model

In [22]:
model_row = get_model_by_name(session, "LagReturnsModel")
loaded_model = joblib.load(model_row.file_path)
print(loaded_model)

LinearRegression()


In [20]:
session.close()