In [None]:
!pip install -U google-cloud_aiplatform langchain langchain-google-genai langgraph langchain_experimental langchain_google_vertexai

In [None]:
from datetime import datetime
from typing import Optional, Sequence, TypedDict, Dict, List, Union, Any
from typing_extensions import Annotated

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler

from keras.models import load_model
from statsmodels.tsa.holtwinters import SimpleExpSmoothing

from langchain_core.messages import (
    BaseMessage,
    HumanMessage,
    AIMessage,
    SystemMessage,
)
from langchain_core.language_models import BaseChatModel, LLM
from langchain_core.outputs import Generation, ChatResult, ChatGeneration
from langchain_core.runnables import Runnable
from langchain_core.tools import Tool, tool
from langchain_core.utils.function_calling import convert_to_openai_tool

from langgraph.graph import StateGraph, END
from langgraph.prebuilt import create_react_agent
from langgraph.prebuilt.chat_agent_executor import (
    AgentState,
    add_messages,
    IsLastStep,
    RemainingSteps,
)

from langchain_google_vertexai import ChatVertexAI

from vertexai import init as vertexai_init


In [None]:
df = None
df2 = None
coef_global = []
XGB_MAPE = None
LSTM_MAPE = None

@tool
def preprocess(input: str) -> str:
    """
    Preprocess the dataframe before passing it to the model.
    """
    global df
    try:
        df = pd.read_csv("/content/Costco_interval_data_lstm.csv") #PATH_CAHNGE_1
        df = df.groupby("Date").agg({"Call Volume": "sum", "AHT": "mean"}).reset_index()
        df.set_index("Date", inplace=True)
        df.sort_index(inplace=True)
        df.drop(columns="AHT", inplace=True)
    except Exception as e:
        return f"Error in reading and preprocessing df: {str(e)}"

    try:
        df.index = pd.to_datetime(df.index, format='mixed')
        df["day_of_week"] = df.index.dayofweek
        df["month"] = df.index.month
        df["quarter"] = df.index.quarter
        df["is_weekend"] = (df["day_of_week"] >= 5).astype(int)
        df["day_of_week_sin"] = np.sin(2 * np.pi * df["day_of_week"] / 7)
        df["day_of_week_cos"] = np.cos(2 * np.pi * df["day_of_week"] / 7)
        df["month_sin"] = np.sin(2 * np.pi * df["month"] / 12)
        df["month_cos"] = np.cos(2 * np.pi * df["month"] / 12)
        df["week_of_month"] = (df.index.day - 1) // 7 + 1

        # Add holiday columns
        major_holidays = pd.to_datetime([
            "2018-01-01", "2018-07-04", "2018-11-22", "2018-12-25",
            "2019-01-01", "2019-07-04", "2019-11-28", "2019-12-25",
            "2020-01-01", "2020-07-04", "2020-11-26", "2020-12-25",
            "2021-01-01", "2021-07-04", "2021-11-25", "2021-12-25",
            "2022-01-01", "2022-07-04", "2022-11-24", "2022-12-25",
            "2023-01-01", "2023-07-04", "2023-11-23", "2023-12-25",
            "2024-01-01", "2024-07-04", "2024-11-28", "2024-12-25",
            "2025-01-01", "2025-07-04", "2025-11-27", "2025-12-25"
        ])
        minor_holidays = pd.to_datetime([
            "2018-02-19", "2018-10-08", "2018-12-24", "2018-12-31",
            "2019-02-18", "2019-10-14", "2019-12-24", "2019-12-31",
            "2020-02-17", "2020-10-12", "2020-12-24", "2020-12-31",
            "2021-02-15", "2021-10-11", "2021-12-24", "2021-12-31",
            "2022-02-21", "2022-10-10", "2022-12-24", "2022-12-31",
            "2023-02-20", "2023-10-09", "2023-12-24", "2023-12-31",
            "2024-02-19", "2024-10-14", "2024-12-24", "2024-12-31",
            "2025-02-17", "2025-10-13", "2025-12-24", "2025-12-31"
        ])
        positive_holidays = pd.to_datetime([
            "2018-11-23", "2018-11-26", "2019-11-29", "2019-12-02",
            "2020-11-27", "2020-11-30", "2021-11-26", "2021-11-29",
            "2022-11-25", "2022-11-28", "2023-11-24", "2023-11-27",
            "2024-11-29", "2024-12-02", "2025-11-28", "2025-12-01"
        ])
        df["is_major_holiday"] = df.index.isin(major_holidays).astype(int)
        df["is_minor_holiday"] = df.index.isin(minor_holidays).astype(int)
        df["is_positive_holiday"] = df.index.isin(positive_holidays).astype(int)

        # MOB Feature Engineering
        df["Year"] = df.index.year
        df["Month"] = df.index.month

        new_accounts_df = pd.read_csv(r"/content/Costco_new_accounts.csv", parse_dates=["Date"]) #PATH_CAHNGE_2
        mob_cols = [f"MOB_{i}" for i in range(24)] + ["MOB_>24"]
        result_df = pd.DataFrame(columns=["Date"] + mob_cols)
        result_df["Date"] = new_accounts_df["Date"]

        for i in range(len(new_accounts_df)):
            values = [0] * len(mob_cols)
            values[0] = new_accounts_df.loc[i, "New_Accounts"]
            for j in range(1, 24):
                if i - j >= 0:
                    values[j] = result_df.loc[i - 1, f"MOB_{j - 1}"]
            if i - 23 >= 0:
                values[-1] = result_df.loc[i - 1, "MOB_>24"] + result_df.loc[i - 1, "MOB_23"]
            result_df.loc[i, mob_cols] = values

        result_df.index = pd.to_datetime(result_df["Date"])
        result_df.drop(columns="Date", inplace=True)
        result_df["Year"] = result_df.index.year
        result_df["Month"] = result_df.index.month

        df = df.merge(result_df, on=["Year", "Month"], how="left").set_index(df.index)
        df.drop(columns=["Year", "Month"], inplace=True)
    except Exception as e:
        return f"Error in feature engineering: {str(e)}"

    return "✅ Preprocessing completed successfully."

tools = [preprocess]
#ADD CREDENTIALS LINE HERE
llm = ChatVertexAI(                            #LLM_OBJECT_CHANGE
    model="gemini-1.5-pro",
    project="agentic-454612",
    location="us-central1",
    max_output_tokens=1024,
    temperature=0.7,
    credentials=credentials
)
bound_llm = llm.bind_tools(tools)

agent_node = create_react_agent(bound_llm, tools=tools, state_schema=AgentState)
graph = StateGraph(AgentState)
graph.add_node("agent", agent_node)
graph.set_entry_point("agent")


runnable = graph.compile()
prompt = "Preprocess the dataframe 'df'"
output = runnable.invoke({"messages": [{"role": "user", "content": prompt}]})

print("\nFinal output from agent:\n")
for msg in output["messages"]:
    if hasattr(msg, "name"):
        print(f"[TOOL: {msg.name}] {msg.content}")