<a href="https://colab.research.google.com/github/San-dra/Digital-Marketing-Performance-Dashboard-UrbanNest/blob/main/03_realtime_daily_websessions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Real-time Session Simulator: Daily Append, or Create New File and Append ---
import pandas as pd
import numpy as np
import random
from datetime import datetime

from google.auth import default
from googleapiclient.discovery import build
import gspread
import gspread_dataframe as gd
from google.colab import auth, userdata

In [None]:
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)
drive_service = build('drive', 'v3')

In [None]:
# --- SETTINGS ---
today = datetime.today()
current_date_str = today.strftime('%Y-%m-%d')
month_year = today.strftime('%b_%Y')
file_name = f"sessions_{month_year}"
folder_id = userdata.get('WEB_SESSIONS_DATA_FOLDER')
current_month_year = today.strftime('%b_%Y')  # e.g., Apr_2025

# Load Campaigns Sheet
campaigns_spreadsheet = gc.open_by_key(userdata.get('CAMPAIGNS_SHEET_ID'))
df_campaigns = gd.get_as_dataframe(campaigns_spreadsheet.sheet1).dropna()

# --- Find active campaign for a country ---
def find_campaign_id(date, country):
    eligible_campaigns = []
    for _, camp in df_campaigns.iterrows():
        if pd.to_datetime(camp['start_date']) <= date <= pd.to_datetime(camp['end_date']):
            if camp['target_countries'] == 'All' or country in camp['target_countries']:
                eligible_campaigns.append(camp['campaign_id'])
    return random.choice(eligible_campaigns) if eligible_campaigns else 'None'

In [None]:
# --- Weighted Options ---
device_weights = ['mobile']*60 + ['desktop']*35 + ['tablet']*5
age_weights = ['18-24']*15 + ['25-34']*40 + ['35-44']*30 + ['45-54']*15
gender_weights = ['female']*60 + ['male']*38 + ['other']*2

country_weights = (
    ['Germany']*20 + ['France']*18 + ['Italy']*15 + ['Spain']*15 +
    ['Netherlands']*8 + ['Poland']*8 + ['Sweden']*5 + ['Ireland']*5 +
    ['Denmark']*4 + ['Belgium']*4 + ['Austria']*3 + ['Finland']*3 +
    ['Portugal']*3 + ['Czech Republic']*3 + ['Greece']*2 + ['Hungary']*2 +
    ['Romania']*2 + ['Bulgaria']*1 + ['Croatia']*1 + ['Cyprus']*1 + ['Estonia']*1 +
    ['Latvia']*1 + ['Lithuania']*1 + ['Luxembourg']*1 + ['Malta']*1 + ['Slovakia']*1 + ['Slovenia']*1
)

uplift = {'normal': (1.0, 1.2), 'campaign': (1.3, 2.0)}

In [None]:
# --- Generate today's session data ---
rows = []
base_sessions = random.randint(450, 650)
for _ in range(int(base_sessions * random.uniform(1.0, 1.3))):
    country = random.choice(country_weights)
    camp_id = find_campaign_id(today, country)
    factor = random.uniform(*uplift['campaign' if camp_id != 'None' else 'normal'])

    rows.append({
        'session_id': f"sess_{random.randint(1000000,9999999)}",
        'user_id': f"user_{random.randint(10000,99999)}",
        'session_date': current_date_str,
        'source_medium': random.choice([
            'google/organic', 'facebook/organic', 'facebook/paid',
            'instagram/organic', 'instagram/paid', 'email/direct',
            'direct/none', 'referral/other', 'tiktok/organic', 'youtube/organic']
        ),
        'campaign_id': camp_id,
        'device_category': random.choice(device_weights),
        'country': country,
        'age_group': random.choice(age_weights),
        'gender': random.choice(gender_weights),
        'sessions': 1,
        'pageviews': random.randint(1, 10),
        'bounce': random.choice([True, False]),
        'session_duration_seconds': random.randint(30, 600),
        'goal_completion': random.choice([0, 1]),
        'transaction_revenue': round(random.uniform(25.0, 300.0), 2) if random.random() < 0.2 else 0
    })

df_today = pd.DataFrame(rows)

In [None]:


# --- Search all existing session files ---
query = f"'{folder_id}' in parents and name contains 'sessions' and mimeType = 'application/vnd.google-apps.spreadsheet'"
results = drive_service.files().list(q=query, fields="files(id, name)").execute()
files = results.get('files', [])

# --- Search if a file for the current month exists ---
target_file = None
existing_numbers = []

for file in files:
    name = file['name']
    if current_month_year in name:
        target_file = file
    # Extract number prefix (e.g., '040' from '040_sessions_Apr_2025')
    prefix = name.split('_')[0]
    if prefix.isdigit():
        existing_numbers.append(int(prefix))

In [None]:
# --- Decide what to do ---
if target_file:
    # Append to existing file
    sheet = gc.open_by_key(target_file['id'])
    worksheet = sheet.sheet1
    existing_df = gd.get_as_dataframe(worksheet).dropna(how='all')
    combined_df = pd.concat([existing_df, df_today], ignore_index=True)
    worksheet.clear()
    gd.set_with_dataframe(worksheet, combined_df)
    print(f"Appended today's data to {target_file['name']}")
else:
    # Create new file with next number
    next_number = max(existing_numbers) + 1 if existing_numbers else 1
    new_file_name = f"{next_number:03d}_sessions_{current_month_year}"
    new_sheet = gc.create(new_file_name)
    worksheet = new_sheet.sheet1
    gd.set_with_dataframe(worksheet, df_today)

    drive_service.files().update(
        fileId=new_sheet.id,
        addParents=folder_id,
        removeParents='root',
        fields='id, parents'
    ).execute()

    print(f"Created new file {new_file_name} and added today's data.")

Appended today's data to 040_sessions_Apr_2025
