#**Pre-request**

##Mount google drive


In [1]:
### **Mount** Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


##Install pakages


In [2]:
#Install pakages
%pip install -q -r /content/drive/MyDrive/Sem-6/coding/github/fraud_detection/Extract_requirements.txt --no-cache-dir



In [3]:
project_path = "/content/drive/MyDrive/Sem-6/coding/github/fraud_detection/"
%cd $project_path
%ls /content/drive/MyDrive/Sem-6/coding/github/fraud_detection

/content/drive/MyDrive/Sem-6/coding/github/fraud_detection
clustring_requirements-lock.txt  requirements-lock.txt
clustring_requirements.txt       requirements.txt
[0m[01;34mconfigs[0m/                         [01;34mresults[0m/
[01;34mdataset[0m/                         run_experiment.py
Extract_requirements-lock.txt    sample_extract_requirements-lock.txt
Extract_requirements.txt         [01;34msrc[0m/
[01;34mnotebooks[0m/                       [01;34mtests[0m/
README.md


##Import  libs

In [4]:

import datetime
import os
import pandas as pd
import numpy as np
from scipy.stats import mode
import yaml
import logging
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import altair as alt
from google.colab import data_table
data_table.DataTable.MAX_COLUMNS = 100
data_table.DataTable.MAX_ROWS = 1000000
data_table.disable_dataframe_formatter()
data_table.enable_dataframe_formatter()
# Expand Colab’s table display limits
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
print("MAX_COLUMNS =", data_table.DataTable.MAX_COLUMNS)
print("MAX_COLUMNS =", data_table.DataTable.MAX_ROWS)

from collections import deque

#%pip freeze > Extract_requirements-lock.txt


MAX_COLUMNS = 100
MAX_COLUMNS = 1000000


#Utility Functions

##Loging

In [5]:

# Make sure results directory exists
os.makedirs("results", exist_ok=True)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler("results/data_extract.log")
    ]
)
logger = logging.getLogger(__name__)



##Config

In [6]:
def load_config(config_path="configs/baseline.yaml"):
    """Load YAML config file."""
    with open(config_path, "r") as f:
        config = yaml.safe_load(f)
    logger.info(f"✅ Loaded config from {config_path}")
    return config


## CDR dataset

In [7]:
def load_cdr(file_path, nrows=None):
    """Load a CSV file and safely parse datetime columns."""
    logger.info(f"📂 Loading file: {file_path} (nrows={nrows})")
    df = pd.read_csv(file_path, nrows=nrows)

    # Auto-detect and parse datetime columns
    for col in df.columns:
        if "datetime" in col.lower() or "time" in col.lower():
            df[col] = pd.to_datetime(df[col], errors="coerce")

    df.columns = df.columns.str.strip()  # clean header spaces
    logger.info(f"✅ Loaded {df.shape[0]} rows, {df.shape[1]} columns")
    return df


def load_all_data(config):
    """
    Load all CSVs defined in config['Agg'] into a dict of DataFrames.
    """
    agg_cfg = config["Agg"]
    base = agg_cfg["base_path"]
    files = agg_cfg["files"]

    data = {}
    for name, fname in files.items():
        path = os.path.join(base, fname)
        df = load_cdr(path)
        data[name] = df
        logger.info(f"📊 Loaded {name:<5} -> {df.shape} from {path}")
    return data

##Features

###Voice Features

In [8]:
def get_voc_feats(df, cutoff_time=None, n_events=None):
    """Extract per-user voice call features within given time window."""
    df = df.copy()
    if df.empty:
        return pd.DataFrame(columns=["phone_no_m"])

    # ✅ Time filtering (moved from build_user_snapshots)
    if cutoff_time is not None:
        df = df[df["start_datetime"] >= cutoff_time]
    if n_events:
        df = df.sort_values("start_datetime").tail(n_events)

    df["call_dur"] = pd.to_numeric(df["call_dur"], errors="coerce").fillna(0)

    df["weekday"] = pd.to_datetime(df["start_datetime"]).dt.weekday
    df["hour"] = pd.to_datetime(df["start_datetime"]).dt.hour

    feats = (
        df.groupby("phone_no_m", as_index=False)
        .agg(
            voc_total_calls=("start_datetime", "count"),
            voc_unique_contacts=("opposite_no_m", "nunique"),
            voc_total_duration=("call_dur", "sum"),
            voc_avg_duration=("call_dur", "mean"),
            voc_max_duration=("call_dur", "max"),
            voc_std_duration=("call_dur", "std"),
            voc_active_days=("weekday", "nunique"),
            voc_active_hours=("hour", "nunique"),
        )
    )
    return feats.fillna(0)


###SMS Features

In [9]:
def get_sms_feats(df, cutoff_time=None, n_events=None):
    """Extract per-user SMS features within given time window."""
    df = df.copy()
    if df.empty:
        return pd.DataFrame(columns=["phone_no_m"])

    # ✅ Time filtering (if requested)
    if cutoff_time is not None:
        df = df[df["request_datetime"] >= cutoff_time]
    if n_events:
        df = df.sort_values("request_datetime").tail(n_events)

    # ✅ Ensure calltype_id is numeric
    df["calltype_id"] = pd.to_numeric(df["calltype_id"], errors="coerce")

    # ✅ Extract hour for time-based features
    df["hour"] = pd.to_datetime(df["request_datetime"]).dt.hour

    feats = (
        df.groupby("phone_no_m", as_index=False)
        .agg(
            sms_total_msgs=("request_datetime", "count"),
            sms_unique_contacts=("opposite_no_m", "nunique"),
            sms_active_hours=("hour", "nunique"),
            # 1 → outgoing, 2 → incoming (adjust if opposite)
            sms_calltype_ratio=("calltype_id", lambda x: (x == 1).mean()),
        )
    )

    return feats.fillna(0)


###App Features

In [10]:
import pandas as pd
import datetime

def get_app_feats(df):
    """Extract per-user application usage features (aggregated monthly).
    Handles various month_id formats internally (201908, '2019-08', '2019/08', '2019-12-01').
    """
    df = df.copy()

    if df.empty:
        return pd.DataFrame(columns=[
            "phone_no_m", "app_months_active", "app_total_flow",
            "app_avg_flow", "app_std_flow",
            "app_unique_apps_mean", "app_unique_apps_max"
        ])

    # Ensure month_id exists
    if "month_id" not in df.columns:
        raise ValueError("❌ APP dataset must contain 'month_id' column.")

    # Convert flow to numeric
    df["flow"] = pd.to_numeric(df["flow"], errors="coerce").fillna(0)

    # Inline month_id parser
    def parse_month_end(x):
        if pd.isna(x):
            return pd.NaT
        s = str(x).strip()
        # Handle YYYYMM
        if s.isdigit() and len(s) == 6:
            y, m = int(s[:4]), int(s[4:])
            return pd.Timestamp(datetime.date(y, m, 1)) + pd.offsets.MonthEnd(0)
        # Handle full or partial date strings
        for fmt_try in [s, s + "-01"]:
            try:
                dt = pd.to_datetime(fmt_try, errors="coerce")
                if pd.notna(dt):
                    return dt + pd.offsets.MonthEnd(0)
            except Exception:
                continue
        return pd.NaT

    # Apply month parsing
    df["month_end"] = df["month_id"].apply(parse_month_end)
    df = df.dropna(subset=["month_end"])

    # Aggregate monthly totals
    monthly = (
        df.groupby(["phone_no_m", "month_end"])
        .agg(
            total_flow=("flow", "sum"),
            unique_apps=("busi_name", "nunique"),
        )
        .reset_index()
    )

    # Aggregate per-user statistics
    features = (
        monthly.groupby("phone_no_m")
        .agg(
            app_months_active=("month_end", "nunique"),
            app_total_flow=("total_flow", "sum"),
            app_avg_flow=("total_flow", "mean"),
            app_std_flow=("total_flow", "std"),
            app_unique_apps_mean=("unique_apps", "mean"),
            app_unique_apps_max=("unique_apps", "max"),
        )
        .reset_index()
        .fillna(0)
    )

    return features


###User Features

In [11]:
def get_user_feats(df):
    """
    Extract per-user ARPU-based features.
    Handles both YYYYMM and YYYY-MM-DD month_id formats.
    Zeros are considered inactive unless explicitly kept.
    """

    import pandas as pd
    df = df.copy()

    if "arpu_value" not in df.columns:
        raise ValueError("Expected column 'arpu_value' not found.")

    # --- Convert month_id to datetime safely ---
    def to_month_end(val):
        if pd.isna(val):
            return pd.NaT
        s = str(val).strip()
        # Handle both '2019-08-01' and '201908'
        try:
            dt = pd.to_datetime(s, errors="coerce")
            if pd.notna(dt):
                return dt + pd.offsets.MonthEnd(0)
        except Exception:
            pass
        # fallback for YYYYMM numeric
        s = s.replace("-", "").replace("/", "")
        if len(s) == 6:
            try:
                return pd.to_datetime(s + "01", format="%Y%m%d") + pd.offsets.MonthEnd(0)
            except Exception:
                return pd.NaT
        return pd.NaT

    df["month_end"] = df["month_id"].apply(to_month_end)

    # --- Convert ARPU values to numeric ---
    df["arpu_value"] = pd.to_numeric(df["arpu_value"], errors="coerce")

    # --- Filter valid ARPU entries ---
    df_valid = df[df["arpu_value"].notna() & (df["arpu_value"] > 0)]

    # --- Aggregate per user ---
    user_feats = (
        df_valid.groupby("phone_no_m", as_index=False)
        .agg(
            user_months_active=("month_end", "nunique"),
            arpu_mean=("arpu_value", "mean"),
            arpu_std=("arpu_value", "std"),
            arpu_max=("arpu_value", "max"),
            idcard_cnt=("idcard_cnt", "max"),
            label=("label", "max"),
        )
    )

    # --- If no active month found, return zero row ---
    if user_feats.empty:
        user_feats = pd.DataFrame([{
            "phone_no_m": df["phone_no_m"].iloc[0],
            "user_months_active": 0,
            "arpu_mean": 0,
            "arpu_std": 0,
            "arpu_max": 0,
            "idcard_cnt": df.get("idcard_cnt", [0])[0],
            "label": df.get("label", [0])[0],
        }])

    return user_feats


###Get feature names

In [12]:
def get_feature_names():
    ALL_FEATURE_COLUMNS = [
        # Voice
        "voc_total_calls", "voc_unique_contacts", "voc_total_duration",
        "voc_avg_duration", "voc_max_duration", "voc_std_duration",
        "voc_active_days", "voc_active_hours",
        # SMS
        "sms_total_msgs", "sms_unique_contacts", "sms_active_hours", "sms_calltype_ratio",
        # App
        "app_months_active", "app_total_flow", "app_avg_flow",
        "app_std_flow", "app_unique_apps_mean", "app_unique_apps_max",
        # User / ARPU
        "user_months_active", "arpu_mean", "arpu_std", "arpu_max",
        "idcard_cnt", "label"
    ]
    return ALL_FEATURE_COLUMNS


##Snapshot

###ensure dataframe

In [13]:

def ensure_dataframe(df, label, user):
    """Guarantee a DataFrame with phone_no_m even if empty or Series."""
    if df is None:
        df = pd.DataFrame()
    if isinstance(df, pd.Series):
        df = df.to_frame().T
    if not isinstance(df, pd.DataFrame):
        df = pd.DataFrame(df)
    if df.empty or "phone_no_m" not in df.columns:
        df["phone_no_m"] = [user]
    return df.reset_index(drop=True)



###Feature extraction wrapper

In [14]:
def extract_features_for_sources(subsets, user):
    """Run feature extraction for each data source safely."""
    feats = {}
    try:
        feats["USER"] = get_user_feats(subsets["ARPU"]) if not subsets["ARPU"].empty else pd.DataFrame()
        feats["VOC"]  = get_voc_feats(subsets["VOC"])   if not subsets["VOC"].empty else pd.DataFrame()
        feats["SMS"]  = get_sms_feats(subsets["SMS"])   if not subsets["SMS"].empty else pd.DataFrame()
        feats["APP"]  = get_app_feats(subsets["APP"])   if not subsets["APP"].empty else pd.DataFrame()
    except Exception as e:
        print(f"❌ Feature extraction failed for user {user}: {e}")
        return {}

    # Ensure dataframes are valid and have phone_no_m
    for key in feats:
        feats[key] = ensure_dataframe(feats[key], key.lower(), user)
    return feats


###Combine to single snapshot row

In [15]:
def combine_features_to_snapshot(
    user, step, cutoff_time, event_type,
    window_mode, window_size, feats,
    all_feature_columns, start_window
):
    """Merge all per-source features into one unified snapshot row."""
    base = pd.DataFrame({
        "phone_no_m": [user],
        "snapshot_index": [step],
        "snapshot_time": [cutoff_time],
        "event_type": [event_type],
        "window_mode": [window_mode],
        "window_size": [window_size],
        "window_start": [start_window],  # ✅ added here
        "window_end": [cutoff_time]      # ✅ optional clarity
    })

    snapshot = (
        base
        .merge(feats.get("VOC", pd.DataFrame()), on="phone_no_m", how="left")
        .merge(feats.get("SMS", pd.DataFrame()), on="phone_no_m", how="left")
        .merge(feats.get("APP", pd.DataFrame()), on="phone_no_m", how="left")
        .merge(feats.get("USER", pd.DataFrame()), on="phone_no_m", how="left")
    )

    snapshot = snapshot.reindex(
        columns=[
            "phone_no_m", "snapshot_index", "snapshot_time",
            "window_start", "window_end", "event_type",
            "window_mode", "window_size"
        ] + all_feature_columns,
        fill_value=0
    )
    return snapshot


###Time-Aware User Snapshots

In [16]:

def build_user_snapshots_global(
    df_voc, df_sms, df_app_tx, df_arpu_tx,
    window_size=1, window_unit="days",
    window_mode="time", max_users=None, max_snapshots=None,
    debug=True
):
    """Round-by-round gradual snapshotting: add one event per user per round,
    printing selected event, current queue, and generated snapshot each round."""

    # 🧩 Merge & sort
    all_events = pd.concat([df_voc, df_sms, df_app_tx, df_arpu_tx], ignore_index=True)
    all_events = all_events.sort_values(["phone_no_m", "event_time"]).reset_index(drop=True)
    users = all_events["phone_no_m"].unique()
    if max_users:
        users = users[:max_users]

    print(f"Total users: {len(users)}")

    # 🧩 Initialize per-user state
    user_data = {
        u: {
            "events": all_events[all_events["phone_no_m"] == u].reset_index(drop=True),
            "queue": deque(),
            "done": False
        }
        for u in users
    }
    ##display(user_data)
    all_snapshots = []
    round_index = 0

    # 🌀 Begin round-robin gradual processing
    while True:
        round_snapshots = []
        selected_events = []
        active = False

        #print(f"\n==============================")
       # print(f"🚀 ROUND {round_index}")
        #print(f"==============================")

        for u in users:
            ud = user_data[u]
            df = ud["events"]
            q = ud["queue"]

            if ud["done"] or df.empty:
                continue
            active = True

            # ✅ Take one event for this user
            event = df.iloc[0]
            ud["events"] = df.iloc[1:].reset_index(drop=True)
            q.append(event)
            current_time = event["event_time"]

            # Trim queue according to window
            if window_mode == "events":
                while len(q) > window_size:
                    q.popleft()
            elif window_mode == "time":
                while (
                    q and (current_time - q[0]["event_time"]) > pd.Timedelta(**{window_unit: window_size})
                ):
                    q.popleft()

            if not q:
                ud["done"] = True
                continue

            # 🧾 Selected event
            selected_events.append({
                "phone_no_m": u,
                "event_time": event["event_time"],
                "source": event["source"],
                "queue_size": len(q)
            })

            # 🪣 Print selected event and queue
            if debug:
                print(f"\n📥 User: {u}")
                print(f"   ➕ Added event: {event['event_time']} ({event['source']})")
                print(f"   🧮 Current queue ({len(q)} events):")
                qdf_display = pd.DataFrame(list(q))[["source", "event_time"]]
                #display(qdf_display)

            # Build snapshot
            qdf = pd.DataFrame(list(q))
            window_start = qdf["event_time"].min()
            window_end = qdf["event_time"].max()
            event_type = event["source"]

            subsets = {
                "VOC": qdf[qdf["source"] == "VOC"],
                "SMS": qdf[qdf["source"] == "SMS"],
                "APP": qdf[qdf["source"] == "APP"],
                "ARPU": qdf[qdf["source"] == "ARPU"],  # match extractor naming
            }

            feats = extract_features_for_sources(subsets, u)
            snapshot = combine_features_to_snapshot(
                user=u,
                step=round_index,
                cutoff_time=window_end,
                event_type=event_type,
                window_mode=window_mode,
                window_size=window_size,
                feats=feats,
                all_feature_columns=get_feature_names(),
                start_window=window_start
            )

            snapshot["window_start"] = window_start
            snapshot["window_end"] = window_end
            snapshot["snapshot_round"] = round_index
            round_snapshots.append(snapshot)

            if ud["events"].empty:
                ud["done"] = True

        # 🛑 Stop if no users active
        if not active:
            print("\n✅ All users processed — exiting.")
            break

        # 🧾 Show selected events summary
        if selected_events:
            #print(f"\n🔹 Selected Events Summary for Round {round_index}")
            sel_df = pd.DataFrame(selected_events)
            #display(sel_df[["phone_no_m", "event_time", "source", "queue_size"]])
        else:
            print("(No events selected this round)")

        # 📸 Show snapshot for this round
        if round_snapshots:
            round_df = pd.concat(round_snapshots, ignore_index=True)
            all_snapshots.append(round_df)

            #print(f"\n📸 === Snapshot Round {round_index} ===")
            #print(f"Users in this round: {round_df['phone_no_m'].nunique()}")
            #print(f"Rows in this snapshot: {len(round_df)}\n")

            #display(round_df[["phone_no_m", "window_start", "window_end", "event_type"]])
            data_table.DataTable.MAX_COLUMNS = 100
            data_table.DataTable.MAX_ROWS = 1000000
            data_table.disable_dataframe_formatter()
            data_table.enable_dataframe_formatter()
           # display("Snapshot")
            #display(round_df)

        round_index += 1
        if max_snapshots and round_index >= max_snapshots:
            print("⛔ Reached max snapshot limit.")
            break

    print("\n✅ Completed all snapshot rounds.")
    return pd.concat(all_snapshots, ignore_index=True) if all_snapshots else pd.DataFrame()


#Excute pipeline

##Config and summary

In [17]:
# 1️⃣ Load config and all datasets
config = load_config("/content/drive/MyDrive/Sem-6/coding/github/fraud_detection/configs/baseline.yaml")
data = load_all_data(config)


# 2️⃣ Extract individual datasets from the returned dictionary
df_voc = data["voc"]
df_sms = data["sms"]
df_app = data["app"]
df_user = data["user"]


# Voice
df_voc["source"] = "VOC"
df_voc["event_time"] = pd.to_datetime(df_voc.get("start_datetime", df_voc.get("event_time")), errors="coerce")

# SMS
df_sms["source"] = "SMS"
df_sms["event_time"] = pd.to_datetime(df_sms.get("request_datetime", df_sms.get("event_time")), errors="coerce")

# App
df_app["source"] = "APP"
df_app["event_time"] = pd.to_datetime(df_app["event_time"], errors="coerce")

# ARPU (User)
df_user["source"] = "ARPU"
df_user["event_time"] = pd.to_datetime(df_user["event_time"], errors="coerce")

print("✅ All datasets standardized and ready for timeline merge:")
print(f"  VOC  → {len(df_voc):,} records")
print(f"  SMS  → {len(df_sms):,} records")
print(f"  APP  → {len(df_app):,} records")
print(f"  ARPU → {len(df_user):,} records")

for df, label in [(df_user, "VOC"), (df_sms, "SMS"), (df_app, "APP"), (df_user, "ARPU")]:
            # Normalize event_time field
            if "event_time" not in df.columns or df["event_time"].isna().all():
                for alt_col in ["start_datetime", "request_datetime", "date", "busi_date"]:
                    if alt_col in df.columns:
                        df["event_time"] = pd.to_datetime(df[alt_col], errors="coerce")
                        break


✅ All datasets standardized and ready for timeline merge:
  VOC  → 48,190 records
  SMS  → 64,225 records
  APP  → 26,142 records
  ARPU → 365 records


##Genrate snapshot

In [18]:

snapshots_df = build_user_snapshots_global(
    df_voc=df_voc,
    df_sms=df_sms,
    df_app_tx=df_app,
    df_arpu_tx=df_user,
    window_size=10,
    window_unit="days",
    window_mode="time",
    max_users=2,
    max_snapshots=500,
    debug=False
)
display(snapshots_df)
# Save output snapshot
save_path = config["Agg"]["save_path"]
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
os.makedirs(save_path, exist_ok=True)

output_file = os.path.join(save_path, f"user_snapshots_{timestamp}.csv")
snapshots_df.to_csv(output_file, index=False)

logger.info(f"✅ Feature extraction completed and saved to: {output_file}")
print(f"✅ Snapshot file saved to: {output_file}")

Total users: 2

✅ All users processed — exiting.

✅ Completed all snapshot rounds.


Unnamed: 0,phone_no_m,snapshot_index,snapshot_time,window_start,window_end,event_type,window_mode,window_size,voc_total_calls,voc_unique_contacts,voc_total_duration,voc_avg_duration,voc_max_duration,voc_std_duration,voc_active_days,voc_active_hours,sms_total_msgs,sms_unique_contacts,sms_active_hours,sms_calltype_ratio,app_months_active,app_total_flow,app_avg_flow,app_std_flow,app_unique_apps_mean,app_unique_apps_max,user_months_active,arpu_mean,arpu_std,arpu_max,idcard_cnt,label,snapshot_round
0,0460a1f64869b7263e73c6ded8390d3afdd9b5eda757bc...,0,2019-12-17 11:28:17,2019-12-17 11:28:17,2019-12-17 11:28:17,SMS,time,10,0,0,0.0,0.000000,0.0,0.000000,0,0,1,1,1,0.000000,0,0.000000,0.000000,0.0,0.0,0,0,0.00,0.0,0.00,0.0,0.0,0
1,082bae239f712d32c5bc3016692d23f383cd457fe9d085...,0,2019-12-04 13:04:00,2019-12-04 13:04:00,2019-12-04 13:04:00,SMS,time,10,0,0,0.0,0.000000,0.0,0.000000,0,0,1,1,1,0.000000,0,0.000000,0.000000,0.0,0.0,0,0,0.00,0.0,0.00,0.0,0.0,0
2,0460a1f64869b7263e73c6ded8390d3afdd9b5eda757bc...,1,2019-12-17 11:28:17,2019-12-17 11:28:17,2019-12-17 11:28:17,SMS,time,10,0,0,0.0,0.000000,0.0,0.000000,0,0,2,1,1,0.000000,0,0.000000,0.000000,0.0,0.0,0,0,0.00,0.0,0.00,0.0,0.0,1
3,082bae239f712d32c5bc3016692d23f383cd457fe9d085...,1,2019-12-04 13:04:00,2019-12-04 13:04:00,2019-12-04 13:04:00,SMS,time,10,0,0,0.0,0.000000,0.0,0.000000,0,0,2,1,1,0.000000,0,0.000000,0.000000,0.0,0.0,0,0,0.00,0.0,0.00,0.0,0.0,1
4,0460a1f64869b7263e73c6ded8390d3afdd9b5eda757bc...,2,2019-12-17 11:30:11,2019-12-17 11:28:17,2019-12-17 11:30:11,SMS,time,10,0,0,0.0,0.000000,0.0,0.000000,0,0,3,1,1,0.000000,0,0.000000,0.000000,0.0,0.0,0,0,0.00,0.0,0.00,0.0,0.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365,0460a1f64869b7263e73c6ded8390d3afdd9b5eda757bc...,206,2019-12-31 00:00:00,2019-12-21 10:54:21,2019-12-31 00:00:00,APP,time,10,126,121,9013.0,71.531746,1800.0,177.452132,2,11,37,2,6,0.189189,1,89.091316,89.091316,0.0,11.0,11,0,0.00,0.0,0.00,0.0,0.0,206
366,0460a1f64869b7263e73c6ded8390d3afdd9b5eda757bc...,207,2019-12-31 00:00:00,2019-12-21 10:54:21,2019-12-31 00:00:00,APP,time,10,126,121,9013.0,71.531746,1800.0,177.452132,2,11,37,2,6,0.189189,1,89.113710,89.113710,0.0,12.0,12,0,0.00,0.0,0.00,0.0,0.0,207
367,0460a1f64869b7263e73c6ded8390d3afdd9b5eda757bc...,208,2019-12-31 00:00:00,2019-12-21 10:54:21,2019-12-31 00:00:00,APP,time,10,126,121,9013.0,71.531746,1800.0,177.452132,2,11,37,2,6,0.189189,1,89.114201,89.114201,0.0,13.0,13,0,0.00,0.0,0.00,0.0,0.0,208
368,0460a1f64869b7263e73c6ded8390d3afdd9b5eda757bc...,209,2019-12-31 00:00:00,2019-12-21 10:54:21,2019-12-31 00:00:00,APP,time,10,126,121,9013.0,71.531746,1800.0,177.452132,2,11,37,2,6,0.189189,1,122.411707,122.411707,0.0,14.0,14,0,0.00,0.0,0.00,0.0,0.0,209


✅ Snapshot file saved to: /content/drive/MyDrive/Sem-6/coding/github/fraud_detection/dataset/CallChinses/features/user_snapshots_20251012_075822.csv
