<a href="https://colab.research.google.com/github/Articbug/Telecom-CDR-Analytics-Platform/blob/main/Notebooks_7_CDR_Performance_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================================
#   CDR PERFORMANCE TUNING
#   Wipro CDR Analytics Project
#   Author: Chandan Sahoo, Bismaya Ranjan Sahoo, Debasish Sahoo
# ============================================================

import subprocess, sys

packages = ['snowflake-connector-python', 'pandas']
for package in packages:
    subprocess.run(
        [sys.executable, '-m', 'pip', 'install', package, '--quiet', '--disable-pip-version-check'],
        capture_output=True
    )

print('All libraries installed successfully!')

import snowflake.connector
import pandas as pd
import time

def get_connection():
    return snowflake.connector.connect(
        account  = 'bopsoxz-lr52214',
        user     = 'CHANDANSAHOO',
        password = 'Chandansahoosnowflake5',
        database = 'TELECOM_DWH',
        schema   = 'DWH',
        warehouse= 'REPORTING_WH'
    )

conn   = get_connection()
cursor = conn.cursor()
cursor.execute('SELECT CURRENT_USER(), CURRENT_DATABASE(), CURRENT_WAREHOUSE()')
row = cursor.fetchone()
print(f'Connected successfully!')
print(f'   User:      {row[0]}')
print(f'   Database:  {row[1]}')
print(f'   Warehouse: {row[2]}')
conn.close()

All libraries installed successfully!
Connected successfully!
   User:      CHANDANSAHOO
   Database:  TELECOM_DWH
   Warehouse: REPORTING_WH


In [None]:
# ============================================================
#   CELL 2: QUERY PERFORMANCE TUNING
# ============================================================
print('=' * 55)
print('   QUERY PERFORMANCE TUNING')
print('=' * 55)

conn   = get_connection()
cursor = conn.cursor()

# ── 1. QUERY WITHOUT OPTIMIZATION
print('\n1. Without Optimization (Full Table Scan):')
start = time.time()
cursor.execute('''
    SELECT CALL_TYPE, COUNT(*) AS total_calls,
           ROUND(SUM(CHARGE_AMOUNT), 2) AS total_revenue
    FROM TELECOM_DWH.STAGING.STG_CDR
    WHERE CALL_START_TIME >= '2024-06-01'
    AND   CALL_START_TIME <  '2024-07-01'
    GROUP BY CALL_TYPE
    ORDER BY total_revenue DESC
''')
rows1 = cursor.fetchall()
time1 = round(time.time() - start, 3)
cols  = [d[0] for d in cursor.description]
df1   = pd.DataFrame(rows1, columns=cols)
print(f'   Execution time: {time1}s')
print(df1.to_string(index=False))

# ── 2. QUERY WITH CLUSTERING BENEFIT
print('\n2. With Clustering Key (DATE_KEY filter):')
start = time.time()
cursor.execute('''
    SELECT ct.CALL_TYPE_CODE, COUNT(*) AS total_calls,
           ROUND(SUM(f.CHARGE_AMOUNT), 2) AS total_revenue
    FROM TELECOM_DWH.DWH.FACT_CDR f
    JOIN TELECOM_DWH.DWH.DIM_CALL_TYPE ct ON f.CALL_TYPE_KEY = ct.CALL_TYPE_KEY
    JOIN TELECOM_DWH.DWH.DIM_DATE d       ON f.DATE_KEY = d.DATE_KEY
    WHERE d.MONTH_NUM = 6
    AND   d.YEAR_NUM  = 2024
    GROUP BY ct.CALL_TYPE_CODE
    ORDER BY total_revenue DESC
''')
rows2 = cursor.fetchall()
time2 = round(time.time() - start, 3)
cols  = [d[0] for d in cursor.description]
df2   = pd.DataFrame(rows2, columns=cols)
print(f'   Execution time: {time2}s')
print(df2.to_string(index=False))

print(f'\n   Performance Comparison:')
print(f'   Without optimization: {time1}s')
print(f'   With clustering:      {time2}s')

cursor.close()
conn.close()
print('\nQuery Performance Tuning Complete!')

   QUERY PERFORMANCE TUNING

1. Without Optimization (Full Table Scan):
   Execution time: 0.693s
Empty DataFrame
Columns: [CALL_TYPE, TOTAL_CALLS, TOTAL_REVENUE]
Index: []

2. With Clustering Key (DATE_KEY filter):
   Execution time: 0.769s
CALL_TYPE_CODE  TOTAL_CALLS TOTAL_REVENUE
         VOICE         2246       3392.44
         VIDEO          218       1073.37
          DATA          629        322.88
           SMS         1022        104.00

   Performance Comparison:
   Without optimization: 0.693s
   With clustering:      0.769s

Query Performance Tuning Complete!


In [None]:
# ============================================================
#   CELL 3: VIEWS + PERFORMANCE TUNING
# ============================================================
print('=' * 55)
print('   VIEWS + PERFORMANCE TUNING')
print('=' * 55)

conn   = get_connection()
cursor = conn.cursor()

# ── 1. CREATE SECURE VIEW
print('\n1. Creating Secure Views:')
cursor.execute('''
    CREATE OR REPLACE SECURE VIEW TELECOM_DWH.ANALYTICS.VW_MONTHLY_REVENUE
    AS
    SELECT
        d.YEAR_NUM,
        d.MONTH_NUM,
        d.MONTH_NAME,
        ct.CALL_TYPE_CODE,
        COUNT(*)                        AS total_calls,
        ROUND(SUM(f.CHARGE_AMOUNT), 2)  AS total_revenue,
        ROUND(AVG(f.DURATION_SECS), 1)  AS avg_duration,
        COUNT(DISTINCT f.CALLER_KEY)    AS unique_callers
    FROM TELECOM_DWH.DWH.FACT_CDR f
    JOIN TELECOM_DWH.DWH.DIM_DATE      d  ON f.DATE_KEY      = d.DATE_KEY
    JOIN TELECOM_DWH.DWH.DIM_CALL_TYPE ct ON f.CALL_TYPE_KEY = ct.CALL_TYPE_KEY
    GROUP BY d.YEAR_NUM, d.MONTH_NUM, d.MONTH_NAME, ct.CALL_TYPE_CODE
''')
print('   VW_MONTHLY_REVENUE created')

cursor.execute('''
    CREATE OR REPLACE SECURE VIEW TELECOM_DWH.ANALYTICS.VW_CUSTOMER_SUMMARY
    AS
    SELECT
        f.CALLER_KEY,
        COUNT(*)                        AS total_calls,
        ROUND(SUM(f.CHARGE_AMOUNT), 2)  AS total_revenue,
        ROUND(AVG(f.DURATION_SECS), 1)  AS avg_duration,
        COUNT(DISTINCT f.CALL_TYPE_KEY) AS services_used,
        SUM(CASE WHEN f.IS_FRAUD   = TRUE THEN 1 ELSE 0 END) AS fraud_count,
        SUM(CASE WHEN f.IS_ROAMING = TRUE THEN 1 ELSE 0 END) AS roaming_count
    FROM TELECOM_DWH.DWH.FACT_CDR f
    GROUP BY f.CALLER_KEY
''')
print('   VW_CUSTOMER_SUMMARY created')

cursor.execute('''
    CREATE OR REPLACE SECURE VIEW TELECOM_DWH.ANALYTICS.VW_NETWORK_PERFORMANCE
    AS
    SELECT
        f.NETWORK_TYPE,
        ct.CALL_TYPE_CODE,
        COUNT(*)                        AS total_calls,
        ROUND(AVG(f.DURATION_SECS), 1)  AS avg_duration,
        ROUND(SUM(f.CHARGE_AMOUNT), 2)  AS total_revenue,
        SUM(CASE WHEN f.TERMINATION_CD = 'DROPPED' THEN 1 ELSE 0 END) AS dropped_calls,
        ROUND(SUM(CASE WHEN f.TERMINATION_CD = 'DROPPED' THEN 1 ELSE 0 END)
              * 100.0 / COUNT(*), 2) AS drop_rate_pct
    FROM TELECOM_DWH.DWH.FACT_CDR f
    JOIN TELECOM_DWH.DWH.DIM_CALL_TYPE ct ON f.CALL_TYPE_KEY = ct.CALL_TYPE_KEY
    GROUP BY f.NETWORK_TYPE, ct.CALL_TYPE_CODE
''')
print('   VW_NETWORK_PERFORMANCE created')

# ── 2. QUERY VIEWS
print('\n2. Querying Views:')

print('\n   Monthly Revenue View:')
start = time.time()
cursor.execute('''
    SELECT MONTH_NAME, CALL_TYPE_CODE, TOTAL_CALLS, TOTAL_REVENUE
    FROM TELECOM_DWH.ANALYTICS.VW_MONTHLY_REVENUE
    ORDER BY YEAR_NUM, MONTH_NUM, TOTAL_REVENUE DESC
    LIMIT 12
''')
rows  = cursor.fetchall()
cols  = [d[0] for d in cursor.description]
df_mv = pd.DataFrame(rows, columns=cols)
t1    = round(time.time() - start, 3)
print(f'   Execution time: {t1}s')
print(df_mv.to_string(index=False))

print('\n   Customer Summary View (Top 10):')
start = time.time()
cursor.execute('''
    SELECT * FROM TELECOM_DWH.ANALYTICS.VW_CUSTOMER_SUMMARY
    ORDER BY TOTAL_REVENUE DESC LIMIT 10
''')
rows    = cursor.fetchall()
cols    = [d[0] for d in cursor.description]
df_cust = pd.DataFrame(rows, columns=cols)
t2      = round(time.time() - start, 3)
print(f'   Execution time: {t2}s')
print(df_cust.to_string(index=False))

print('\n   Network Performance View:')
start = time.time()
cursor.execute('SELECT * FROM TELECOM_DWH.ANALYTICS.VW_NETWORK_PERFORMANCE ORDER BY TOTAL_REVENUE DESC')
rows   = cursor.fetchall()
cols   = [d[0] for d in cursor.description]
df_net = pd.DataFrame(rows, columns=cols)
t3     = round(time.time() - start, 3)
print(f'   Execution time: {t3}s')
print(df_net.to_string(index=False))

# ── 3. SHOW ALL VIEWS
print('\n3. All Analytics Views:')
cursor.execute('SHOW VIEWS IN SCHEMA TELECOM_DWH.ANALYTICS')
rows    = cursor.fetchall()
cols    = [d[0] for d in cursor.description]
df_views = pd.DataFrame(rows, columns=cols)
print(df_views[['created_on', 'name', 'schema_name', 'is_secure']].to_string(index=False))

cursor.close()
conn.close()
print('\nViews + Performance Tuning Complete!')
print('=' * 55)

   VIEWS + PERFORMANCE TUNING

1. Creating Secure Views:
   VW_MONTHLY_REVENUE created
   VW_CUSTOMER_SUMMARY created
   VW_NETWORK_PERFORMANCE created

2. Querying Views:

   Monthly Revenue View:
   Execution time: 0.869s
MONTH_NAME CALL_TYPE_CODE  TOTAL_CALLS TOTAL_REVENUE
       Jan          VOICE         2309       3474.04
       Jan          VIDEO          232       1123.22
       Jan           DATA          665        348.01
       Jan            SMS         1059        107.40
       Feb          VOICE         2219       3309.25
       Feb          VIDEO          218       1105.15
       Feb           DATA          623        301.48
       Feb            SMS         1004        101.70
       Mar          VOICE         2358       3643.46
       Mar          VIDEO          197        919.22
       Mar           DATA          658        337.12
       Mar            SMS          999        101.50

   Customer Summary View (Top 10):
   Execution time: 0.478s
 CALLER_KEY  TOTAL_CALLS 