# TelecomAI Customer Intelligence â€” Demo

Quick demo: load data, train model, predict, and visualize feature importance.

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import joblib

print('Imports ready')

In [None]:
# Load data
csv_path = Path('../data/raw/users_behavior.csv')
if not csv_path.exists():
    # Create small synthetic sample if not present
    n = 500
    df = pd.DataFrame({
        'calls': np.random.uniform(0, 100, n),
        'minutes': np.random.uniform(0, 1000, n),
        'messages': np.random.uniform(0, 200, n),
        'mb_used': np.random.uniform(0, 50000, n),
        'is_ultra': np.random.choice([0, 1], n, p=[0.7, 0.3])
    })
else:
    df = pd.read_csv(csv_path)

print(f'Dataset shape: {df.shape}')
df.head()

In [None]:
# Prepare features and target
target_col = 'is_ultra'
feature_cols = ['calls', 'minutes', 'messages', 'mb_used']

X = df[feature_cols]
y = df[target_col]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f'Train size: {len(X_train)}, Test size: {len(X_test)}')
print(f'Target distribution (train): {y_train.value_counts(normalize=True).to_dict()}')

In [None]:
# Load pre-trained model if available, otherwise train a simple one
model_path = Path('../artifacts/model.joblib')
alt_model_path = Path('../models/model_v1.0.0.pkl')

if model_path.exists():
    model = joblib.load(model_path)
    print(f'Loaded model from {model_path}')
elif alt_model_path.exists():
    model = joblib.load(alt_model_path)
    print(f'Loaded model from {alt_model_path}')
else:
    from sklearn.ensemble import GradientBoostingClassifier
    model = GradientBoostingClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    print('Trained new GradientBoostingClassifier')

# Predict
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# Metrics
print('\n=== Classification Report ===')
print(classification_report(y_test, y_pred, target_names=['Smart', 'Ultra']))

roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc:.4f}')

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

fig, ax = plt.subplots(figsize=(6, 5))
im = ax.imshow(cm, cmap='Blues')
ax.set_xticks([0, 1])
ax.set_yticks([0, 1])
ax.set_xticklabels(['Smart', 'Ultra'])
ax.set_yticklabels(['Smart', 'Ultra'])
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_title('Confusion Matrix')

# Add text annotations
for i in range(2):
    for j in range(2):
        ax.text(j, i, str(cm[i, j]), ha='center', va='center', fontsize=14)

plt.colorbar(im)
plt.tight_layout()
plt.show()

In [None]:
# Feature Importance
if hasattr(model, 'feature_importances_'):
    importances = model.feature_importances_
    feature_names = feature_cols
elif hasattr(model, 'named_steps') and hasattr(model.named_steps.get('model', model), 'feature_importances_'):
    importances = model.named_steps['model'].feature_importances_
    feature_names = feature_cols
else:
    # Fallback to coefficient-based importance for linear models
    importances = X_train.var().values
    feature_names = feature_cols
    print('Using variance as proxy for feature importance')

# Sort and plot
sorted_idx = np.argsort(importances)[::-1]

plt.figure(figsize=(8, 5))
plt.bar(range(len(feature_names)), importances[sorted_idx])
plt.xticks(range(len(feature_names)), [feature_names[i] for i in sorted_idx], rotation=45)
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.title('Feature Importance')
plt.tight_layout()
plt.show()

In [None]:
# Summary
summary = {
    'model_type': type(model).__name__,
    'test_samples': len(X_test),
    'roc_auc': round(roc_auc, 4),
    'accuracy': round((y_pred == y_test).mean(), 4),
}
print(json.dumps(summary, indent=2))