In [1]:
import pandas as pd
import numpy as np

from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt

import os
import sys
import warnings
warnings.filterwarnings("ignore")
if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesse

In [2]:
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine


def create_db(user="root", password="Chenlu1974", server="localhost", database="transact"):
    SQLALCHEMY_DATABASE_URL = "mysql+pymysql://{}:{}@{}/{}".format(
        user, password, server, database
    )
    engine = create_engine(SQLALCHEMY_DATABASE_URL)

    SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
    Base = declarative_base()

    return engine, SessionLocal, Base


engine, SessionLocal, Base = create_db(password='msql1234')

In [6]:
from collections import OrderedDict
from src.exception import CustomException
from src.utils import load_object
import joblib

def get_CLO(features):
	model_path = os.path.join("artifacts", "roi_trained_model.pkl")
	encoder_path = os.path.join("artifacts", "roi_onehot_encoder.pkl")

	model = load_object(file_path=model_path)
	encoder = joblib.load(encoder_path) # Load pre-fitted OneHotEncoder
	# Apply the pre-fitted encoder to the new data
	X_encoded = encoder.transform(features[['category']])  # Use transform, not fit_transform

	# Get the expected encoded feature names from the encoder
	encoded_columns = encoder.get_feature_names_out(['category'])

	# Concatenate the encoded category columns with the 'cost' column
	X_transformed = np.concatenate([X_encoded, features[['cost']].values], axis=1)

	pred = model.predict(X_transformed)
	return pd.DataFrame(pred, columns=['clicks', 'leads', 'orders'])

with engine.connect() as db:
	query = sqlalchemy.text(
		'''
		SELECT * FROM campaign;
		''')
	df = pd.DataFrame(db.execute(query).fetchall())
	db.close()

features = df.rename(columns={'channel': 'category', 'budget':'cost'}).loc[:, ['category', 'cost']]
features

CLO = get_CLO(features)

df = pd.concat([df, CLO], axis=1)
df = df[df['start_date'] >= df['start_date'].max() - relativedelta(months=11)] \
	.groupby([df['start_date'].dt.to_period("M")]) \
	.agg({"clicks": ['sum'], "leads": ['sum'], "orders": ['sum']})[['clicks', 'leads', 'orders']].reset_index()

df.columns = df.columns.get_level_values(0)
df['start_date'] = df['start_date'].astype(str)
data = df
data['clicksColor'] = ["hsl(229, 70%, 50%)" for i in range(len(data))]
data['leadsColor'] = ["hsl(296, 70%, 50%)" for i in range(len(data))]
data['ordersColor'] = ["hsl(97, 70%, 50%)" for i in range(len(data))]
data = data.to_dict(orient='records')
data

[{'start_date': '2024-01',
  'clicks': 6581.6900000000005,
  'leads': 163.89,
  'orders': 3295.49,
  'clicksColor': 'hsl(229, 70%, 50%)',
  'leadsColor': 'hsl(296, 70%, 50%)',
  'ordersColor': 'hsl(97, 70%, 50%)'},
 {'start_date': '2024-02',
  'clicks': 13373.74,
  'leads': 323.69,
  'orders': 5250.7300000000005,
  'clicksColor': 'hsl(229, 70%, 50%)',
  'leadsColor': 'hsl(296, 70%, 50%)',
  'ordersColor': 'hsl(97, 70%, 50%)'},
 {'start_date': '2024-03',
  'clicks': 19873.94,
  'leads': 481.57,
  'orders': 8499.8,
  'clicksColor': 'hsl(229, 70%, 50%)',
  'leadsColor': 'hsl(296, 70%, 50%)',
  'ordersColor': 'hsl(97, 70%, 50%)'},
 {'start_date': '2024-04',
  'clicks': 6642.19,
  'leads': 161.57,
  'orders': 1568.96,
  'clicksColor': 'hsl(229, 70%, 50%)',
  'leadsColor': 'hsl(296, 70%, 50%)',
  'ordersColor': 'hsl(97, 70%, 50%)'},
 {'start_date': '2024-05',
  'clicks': 8942.95,
  'leads': 216.81,
  'orders': 4341.04,
  'clicksColor': 'hsl(229, 70%, 50%)',
  'leadsColor': 'hsl(296, 70%, 50%