<img src="https://www.teradata.com/Teradata/Images/Rebrand/Teradata_logo-two_color.png" alt="Teradata" width="400" align="right"/>

<br/>

# TELCO CHURN DEMO
# **PART 2: MODELLING (NAIVE BAYES)** 

In [1]:
%lsconnect

Disconnected: NAME=ck186030, USER=ck186030, HOST=tddb-env-d-268.vantage.demo.intellicloud.teradata.com


In [3]:
%connect ck186030

Success: 'ck186030' connection established


#### 1a. Perform Naive Bayes on assocaited text tokens for known PATHS (TRAINING TABLE ONLY)

In [4]:
DROP TABLE csi_telco_churn_model;

Success: 17 rows affected

In [5]:
CREATE MULTISET TABLE csi_telco_churn_model 
AS (
  SELECT * FROM NaiveBayesTextClassifierTrainer (
    ON (
      SELECT * FROM NaiveBayesTextClassifierInternal (
        ON ( SELECT *
        FROM model_dataset_train
        ) AS "input" PARTITION BY category
        USING
        TokenColumn ('token')
       	ModelType ('Bernoulli')
        DocIDColumns ('customerid')
        DocCategoryColumn ('category')
      ) AS alias_1
    ) PARTITION BY 1
  ) AS alias_2 
)
WITH DATA;

Success: 0 rows affected

#### 1b.  Perform the Naive Bayes PREDICT function using the MLE on the TEST data

In [6]:
DROP TABLE csi_telco_churn_predict_test;

Success: 17 rows affected

In [7]:
CREATE TABLE csi_telco_churn_predict_test
AS
(SELECT *
    FROM NaiveBayesTextClassifierPredict@coprocessor (
    ON (SELECT *
        FROM model_dataset_test
    ) AS predicts 
    PARTITION BY customerid
    ON csi_telco_churn_model AS model DIMENSION
    USING
    InputTokenColumn ('token')
    ModelType ('Bernoulli')
    DocIDColumns ('customerid')
    TopK (2)
  ) AS dt
)
WITH DATA;

Success: 0 rows affected

In [8]:
SELECT
TOP 10 *
FROM csi_telco_churn_predict_test;

customerid,prediction,loglik
530333,CHURN,-13.717824010983582
657232,NON CHURN,-4.976559003595135
657232,CHURN,-17.09758106140787
671853,CHURN,-21.16748261677729
602031,CHURN,-13.717824010983582
602031,NON CHURN,-4.416404694865529
671853,NON CHURN,-5.509423562257397
530333,NON CHURN,-4.416404694865529
600155,CHURN,-17.137676575692762
600155,NON CHURN,-4.090119121923088


#### 2c. Pivot from long to wide and create a binary output for confusion matrix

In [9]:
DROP TABLE csi_telco_churn_predict_test_pivot;

Success: 17 rows affected

In [10]:
CREATE TABLE csi_telco_churn_predict_test_pivot
AS
(
  SELECT * FROM Pivoting (
    ON csi_telco_churn_predict_test
    PARTITION BY customerid
    ORDER BY customerid, prediction
    USING
    PartitionColumns ('customerid')
    NumberOfRows (2)
    TargetColumns ('loglik')
  ) AS dt
) 
WITH DATA;

Success: 0 rows affected

In [11]:
SELECT *
FROM csi_telco_churn_predict_test_pivot
WHERE customerid = 417117;

customerid,loglik_0,loglik_1
417117,-16.384631253551746,-5.391957459552696


In [12]:
DROP TABLE csi_telco_churn_predict_test_binary;

Success: 17 rows affected

In [13]:
CREATE TABLE csi_telco_churn_predict_test_binary
AS
(SELECT
    actuals.customerid
    , actuals.actual
    , predictions.prediction
    FROM(SELECT
        orig_test.customerid
        , CASE
        WHEN TRIM(orig_test.category) LIKE 'NON CHURN' THEN 0 ELSE 1
        END AS actual
        FROM (SELECT
            customerid
            , category
            FROM model_dataset_test
            GROUP BY customerid, category
            ) orig_test
        ) actuals
    INNER JOIN 
        (SELECT
        customerid
        , CASE WHEN loglik_0 > loglik_1 THEN 1 ELSE 0 END AS prediction
        FROM csi_telco_churn_predict_test_pivot
        ) predictions
    ON actuals.customerid = predictions.customerid
)
WITH DATA;

Success: 0 rows affected

In [15]:
SELECT *
FROM csi_telco_churn_predict_test_binary
WHERE actual = 1 OR prediction = 1;

customerid,actual,prediction
350778,1,0
351359,1,1
350547,1,0
350886,1,0
351440,1,1
350412,1,0
350414,1,1
350137,1,1
351200,1,0
350690,1,0


#### 3. Create a confusion matrix to evaluate Naive Bayes model performance

In [16]:
DROP TABLE count_output;

Success: 16 rows affected

In [17]:
DROP TABLE stat_output;

Success: 15 rows affected

In [18]:
DROP TABLE acc_output;

Success: 16 rows affected

In [19]:
SELECT * FROM ConfusionMatrix (
  ON csi_telco_churn_predict_test_binary
    PARTITION BY 1
    OUT TABLE CountTable (count_output)
    OUT TABLE StatTable (stat_output)
    OUT TABLE AccuracyTable(acc_output)
    USING
    ObsColumn ('actual')
    PredictColumn ('prediction')
) AS dt;

message
Success !
The result has been outputted to output tables


In [20]:
SELECT * FROM count_output;

observation,0,1
1,18,19
0,167210,0


In [21]:
SELECT * FROM stat_output;

key,value
95% CI,"(0.9998, 0.9999)"
P-Value [Acc > NIR],0.0004
Mcnemar Test P-Value,0.0001
Accuracy,0.9999
Null Error Rate,0.0002
Kappa,1.0241


In [22]:
SELECT * FROM acc_output;

measure,0,1
Specificity,0.5135,1.0
Neg Pred Value,1.0,0.9999
Detection Rate,0.9998,0.0001
Balanced Accuracy,0.7568,0.7568
Sensitivity,1.0,0.5135
Pos Pred Value,0.9999,1.0
Prevalence,0.9998,0.0002
Detection Prevalence,0.9999,0.0001
