<img src="https://www.teradata.com/Teradata/Images/Rebrand/Teradata_logo-two_color.png" alt="Teradata" width="400" align="right"/>

<br/>

# TELCO CHURN DEMO
# **PART 3: MODELLING (SUPPORT VECTOR MACHINE - SVM)**

In [21]:
%lsconnect

*Connected:   NAME=demo, USER=vantage, HOST=tddb-env-d-268.vantage.demo.intellicloud.teradata.com


In [22]:
%connect demo

Success: 'demo' connection established


#### 1a. Create an SVM model on assocaited text tokens for known PATHS (TRAINING TABLE ONLY) **ADS from PART 2**

In [26]:
DROP TABLE TelcoChurn.svm_telco_churn_model;

Success: 15 rows affected

In [27]:
SELECT * FROM SVMSparse (
  ON TelcoChurn.model_dataset_train AS InputTable
  OUT TABLE ModelTable (TelcoChurn.svm_telco_churn_model)
  USING
  IDColumn ('customerid')
  AttributeNameColumn ('token')
  ResponseColumn ('category')
  MaxStep (150)
  Seed (0)
) AS dt;

message
Model table is created successfully
The model is trained with 204455 samples and 12 unique attributes
There are 2 different classes in the training set
"The model is converged after 11 steps with epsilon 0.01, the value of the loss function for the training set is 65.476902280285"
The corresponding training parameters are cost:1.0 bias:0.0


#### 1b.  Perform the SVM PREDICT function using the MLE on the TEST data

In [28]:
DROP TABLE TelcoChurn.csi_telco_churn_predict_svm_test;

Success: 17 rows affected

In [29]:
CREATE TABLE TelcoChurn.csi_telco_churn_predict_svm_test
AS
(SELECT *
    FROM SparseSVMPredictor@coprocessor (
    ON TelcoChurn.model_dataset_test AS input
    PARTITION BY customerid
    ON TelcoChurn.svm_telco_churn_model AS model DIMENSION
    USING
    SampleIDColumn ('customerid')
    AttributeColumn ('token')
  ) AS dt
)
WITH DATA;

Success: 0 rows affected

In [30]:
SELECT
TOP 10 *
FROM TelcoChurn.csi_telco_churn_predict_svm_test;

customerid,predict_value,predict_confidence
351924,NON CHURN,0.7313363707012815
599686,NON CHURN,0.7371807837315669
367014,NON CHURN,0.7406517903199477
524766,NON CHURN,0.7450795955129667
389751,NON CHURN,0.9567345847110764
443341,NON CHURN,0.7450795955129667
370032,NON CHURN,0.8914599296326652
422215,NON CHURN,0.8898314690454716
551663,NON CHURN,0.7330674477467816
512225,NON CHURN,0.7450795955129667


#### 1c. Create a binary output for confusion matrix

In [32]:
DROP TABLE TelcoChurn.csi_telco_churn_predict_test_binary_svm;

Success: 17 rows affected

In [33]:
CREATE TABLE TelcoChurn.csi_telco_churn_predict_test_binary_svm
AS
(SELECT
    actuals.customerid
    , actuals.actual
    , predictions.prediction
    FROM(SELECT
        orig_test.customerid
        , CASE
        WHEN TRIM(orig_test.category) LIKE 'NON CHURN' THEN 0 ELSE 1
        END AS actual
        FROM (SELECT
            customerid
            , category
            FROM TelcoChurn.model_dataset_test
            GROUP BY customerid, category
            ) orig_test
        ) actuals
    INNER JOIN 
        (SELECT
        customerid
        , CASE WHEN TRIM(predict_value) LIKE 'CHURN' THEN 1 ELSE 0 END AS prediction
        FROM TelcoChurn.csi_telco_churn_predict_svm_test
        ) predictions
    ON actuals.customerid = predictions.customerid
)
WITH DATA;

Success: 0 rows affected

In [34]:
SELECT
TOP 10 *
FROM TelcoChurn.csi_telco_churn_predict_test_binary_svm;

customerid,actual,prediction
351924,0,0
599686,0,0
367014,0,0
524766,0,0
389751,0,0
443341,0,0
370032,0,0
422215,0,0
551663,0,0
512225,0,0


#### 2. Create a confusion matrix to evaluate SVM model performance

In [35]:
DROP TABLE TelcoChurn.count_output_svm;

Success: 16 rows affected

In [36]:
DROP TABLE TelcoChurn.stat_output_svm;

Success: 15 rows affected

In [37]:
DROP TABLE TelcoChurn.acc_output_svm;

Success: 16 rows affected

In [38]:
SELECT * FROM ConfusionMatrix (
  ON TelcoChurn.csi_telco_churn_predict_test_binary_svm
    PARTITION BY 1
    OUT TABLE CountTable (TelcoChurn.count_output_svm)
    OUT TABLE StatTable (TelcoChurn.stat_output_svm)
    OUT TABLE AccuracyTable(TelcoChurn.acc_output_svm)
    USING
    ObsColumn ('actual')
    PredictColumn ('prediction')
) AS dt;

message
Success !
The result has been outputted to output tables


In [39]:
SELECT * FROM TelcoChurn.count_output_svm;

observation,0,1
1,22,16
0,153387,0


In [40]:
SELECT * FROM TelcoChurn.stat_output_svm;

key,value
95% CI,"(0.9998, 0.9999)"
P-Value [Acc > NIR],0.0035
Mcnemar Test P-Value,0
Accuracy,0.9999
Null Error Rate,0.0002
Kappa,0.9643


In [41]:
SELECT * FROM TelcoChurn.acc_output_svm;

measure,0,1
Specificity,0.4211,1.0
Neg Pred Value,1.0,0.9999
Detection Rate,0.9998,0.0001
Balanced Accuracy,0.7105,0.7105
Sensitivity,1.0,0.4211
Pos Pred Value,0.9999,1.0
Prevalence,0.9998,0.0002
Detection Prevalence,0.9999,0.0001
