<a href="https://colab.research.google.com/github/ShikharV010/gist_daily_runs/blob/main/justcall_smsapicall.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Cell 1: Install necessary packages ---
!pip install pandas requests --quiet

In [None]:
# --- Cell 2: Import libraries ---
import requests
import pandas as pd
from datetime import datetime
from IPython.display import display


In [None]:
# --- Cell 3: User Inputs ---
API_KEY = "cc7718b616f3be5e663be9f132548cbf083fc5e9"
API_SECRET = "1f26c3c1e9bbf56324f5f9ddb70bab81b42cff38"

# number of records per page
per_page = 50


In [None]:
# --- Cell 4: Function to fetch messages ---
def fetch_messages(api_key, api_secret, start_date=None, end_date=None, per_page=50):
    url = 'https://api.justcall.io/v2.1/texts'
    headers = {'Accept': 'application/json'}
    all_messages = []
    page = 0

    while True:
        params = {
            'page': page,
            'per_page': per_page,
            'sort': 'id'
        }

        # Include date filters if provided
        if start_date:
            params['start_date'] = start_date
        if end_date:
            params['end_date'] = end_date

        response = requests.get(url, headers=headers, params=params, auth=(api_key, api_secret))

        if response.status_code != 200:
            print(f"Error {response.status_code}: {response.text}")
            break

        data = response.json()
        messages = data.get('data', [])
        if not messages:
            break

        for msg in messages:
            # Flatten sms_info
            sms_body = msg.get('sms_info', {}).get('body', '')
            is_mms = msg.get('sms_info', {}).get('is_mms', '')
            msg['sms_body'] = sms_body
            msg['is_mms'] = is_mms
            msg.pop('sms_info', None)

            # Add fetch timestamp
            msg['fetched_at'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        all_messages.extend(messages)

        # Stop if no more pages
        if not data.get('next_page_link'):
            break

        page += 1

    return all_messages


In [None]:
# --- Cell 5: Get start date and end date from fetched_data ---
from datetime import datetime, timedelta
from sqlalchemy import text

TABLE_SCHEMA = "gist"
TABLE_NAME   = "gist_justcall_messages"

default_start_days = 7  # fallback if table is empty (change as needed)

def get_last_fetched_timestamp(engine):
    """
    Returns the max(date_ingested) from the DB.
    If table doesn't exist or is empty ‚Üí returns None.
    """
    try:
        with engine.connect() as conn:
            result = conn.execute(text(f"""
                SELECT MAX(date_ingested)
                FROM {TABLE_SCHEMA}.{TABLE_NAME};
            """)).scalar()
            return result  # result may be None
    except Exception as e:
        print(f"‚ö†Ô∏è Could not read last fetched timestamp: {e}")
        return None

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 1Ô∏è‚É£ Determine start_date
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
last_fetched = get_last_fetched_timestamp(engine)

if last_fetched:
    start_date = last_fetched
    print(f"‚è≥ Using last fetched timestamp from DB: {start_date}")
else:
    start_date = datetime.utcnow() - timedelta(days=default_start_days)
    print(f"üü¶ Table empty or unreadable ‚Üí fallback start_date: {start_date}")

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 2Ô∏è‚É£ Determine end_date
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
end_date = datetime.utcnow()
print(f"üèÅ end_date set to now: {end_date}")



‚è≥ Using last fetched timestamp from DB: 2025-11-19 14:51:28.406872
üèÅ end_date set to now: 2025-11-19 17:27:18.231418


  end_date = datetime.utcnow()


In [None]:
# --- Cell 5: Fetch messages ---
messages = fetch_messages(API_KEY, API_SECRET, start_date=start_date, end_date=end_date, per_page=per_page)
print(f"Fetched {len(messages)} messages.")


Fetched 1344 messages.


In [None]:
# --- Cell 6: Convert to DataFrame and clean ---
if messages:
    df = pd.DataFrame(messages)

    # Reorder columns
    columns_order = [
        'id', 'contact_number', 'contact_name', 'contact_email', 'justcall_number',
        'justcall_line_name', 'agent_id', 'agent_name', 'agent_email',
        'sms_date', 'sms_user_date', 'sms_time', 'sms_user_time',
        'direction', 'cost_incurred', 'delivery_status', 'is_deleted',
        'medium', 'sms_body', 'is_mms', 'fetched_at'
    ]

    # Filter only existing columns (some might be missing)
    columns_order = [col for col in columns_order if col in df.columns]

    df = df[columns_order]

    display(df.head())
else:
    print("No messages found for the given date range.")


Unnamed: 0,id,contact_number,contact_name,contact_email,justcall_number,justcall_line_name,agent_id,agent_name,agent_email,sms_date,...,sms_time,sms_user_time,direction,cost_incurred,delivery_status,is_deleted,medium,sms_body,is_mms,fetched_at
0,489378558,18304884996,,,14302335687,21/10 texas,457737,Tommy Flores,tommy.flores@gushwork.ai,2025-11-19,...,17:25:45,09:25:45,Outgoing,0.05,delivered,False,,"Hi Jasbeen, Tommy here from Gushwork. We ran a...",no,2025-11-19 17:27:27
1,489377049,19494563476,,,19522434423,"Twin Cities, MN, US",437549,John Rei Caligtan,john.caligtan@gushwork.ai,2025-11-19,...,17:23:43,09:23:43,Outgoing,0.025,delivered,False,,Noted\n,no,2025-11-19 17:27:27
2,489376795,19494563476,,,19522434423,"Twin Cities, MN, US",354398,Swapnil Sinha,growth@gushwork.ai,2025-11-19,...,17:23:25,09:23:25,Incoming,0.008625,received,False,One-To-One via App,Can I call you later?,no,2025-11-19 17:27:27
3,489364885,15415713025,,,19893732191,21/10 Michigan,354398,Swapnil Sinha,growth@gushwork.ai,2025-11-19,...,17:08:10,09:08:10,Incoming,0.008625,received,False,One-To-One via App,You put the 25th,no,2025-11-19 17:27:27
4,489357881,16025383616,,,18722785781,"Chicago Zone, IL, US",447907,Archie Mae Ellezo,archie.mae@gushwork.ai,2025-11-19,...,17:00:38,09:00:38,Outgoing,0.025,delivered,False,,"Hi Carlos, we are on the call now. Let me know...",no,2025-11-19 17:27:27




In [None]:
# --- Cell 7: Write DataFrame to Postgres ---
import sqlalchemy
from sqlalchemy import create_engine, text
from datetime import datetime

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ DB config ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
engine = create_engine(
    "postgresql://airbyte_user:airbyte_user_password@"
    "gw-rds-prod.celzx4qnlkfp.us-east-1.rds.amazonaws.com:5432/gw_prod"
)
TABLE_SCHEMA = "gist"
TABLE_NAME   = "gist_justcall_messages"
VIEW_NAME    = "vw_justcall_messages"

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ DataFrame to insert ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
if df.empty:
    print("üõë No new messages to insert."); raise SystemExit

df["date_ingested"] = datetime.utcnow()  # timestamp of ingestion

try:
    # 1Ô∏è‚É£ pull existing message IDs (small result set, OK for now)
    with engine.connect() as conn:
        existing_ids = {row[0] for row in conn.execute(
            text(f"SELECT id FROM {TABLE_SCHEMA}.{TABLE_NAME}")
        )}
    print(f"üì¶ existing rows in DB: {len(existing_ids)}")

    # 2Ô∏è‚É£ filter out duplicates
    df_new = df[~df["id"].isin(existing_ids)]
    print(f"üÜï new rows to insert: {len(df_new)}")

    # 3Ô∏è‚É£ append new rows
    if not df_new.empty:
        df_new.to_sql(
            name=TABLE_NAME,
            con=engine,
            schema=TABLE_SCHEMA,
            if_exists="append",
            index=False,
            method="multi"
        )
        print("‚úÖ new rows appended.")
    else:
        print("üõë nothing new to append.")

except Exception as e:
    # table missing ‚Üí create from scratch
    print(f"üì≠ table absent or error querying it ‚Üí creating afresh.\n{e}")
    df.to_sql(
        name=TABLE_NAME,
        con=engine,
        schema=TABLE_SCHEMA,
        if_exists="replace",
        index=False,
        method="multi"
    )
    print(f"‚úÖ table {TABLE_SCHEMA}.{TABLE_NAME} created.")

# 4Ô∏è‚É£ make / refresh view
with engine.begin() as conn:
    conn.execute(text(f"""
        CREATE OR REPLACE VIEW {TABLE_SCHEMA}.{VIEW_NAME} AS
        SELECT *
        FROM   {TABLE_SCHEMA}.{TABLE_NAME};
    """))
print(f"ü™ü view {TABLE_SCHEMA}.{VIEW_NAME} refreshed.")

engine.dispose()


  df["date_ingested"] = datetime.utcnow()  # timestamp of ingestion


üì¶ existing rows in DB: 1479
üÜï new rows to insert: 18
‚úÖ new rows appended.
ü™ü view gist.vw_justcall_messages refreshed.


In [None]:
# --- Check actual date range in the data ---
if messages:
    # Convert sms_date to datetime for proper min/max calculation
    df['sms_date'] = pd.to_datetime(df['sms_date'], errors='coerce')

    min_date = df['sms_date'].min()
    max_date = df['sms_date'].max()

    print(f"Data covers from {min_date.date()} to {max_date.date()}")


Data covers from 2025-08-19 to 2025-11-19
