In [64]:
#Load the libraries we want

import pandas as pd                                                                           # Pandas

from sklearn.preprocessing import LabelEncoder                                                # For target values
from sklearn.feature_extraction.text import TfidfVectorizer                                   # For text to numbers
from sklearn.model_selection import train_test_split                                          # For splitting the data

from sklearn.svm import LinearSVC                                                             # Our Model

from sklearn.metrics import accuracy_score                                                    # Metrics to evaluate Model

In [69]:
df_Data = pd.read_csv(r'C:\NLP\Labels\User_Stories_Banking.csv')                              # Load input file

In [70]:
pd.set_option('max.colwidth',1000)                                                           # Set the column width

In [71]:
df_Data.head()                                                                               # View few loaded rows

Unnamed: 0,Stories,Tag
0,"As a customer, I want to open a RD account, So that I can start saving",Deposit
1,"As a RD holder, I want to select a nominee, So that I can let bank know my nominee details",Deposit
2,"As a RD holder, I want to view current balance, So that I can know my savings",Deposit
3,"As a banker, I want amount deducted monthly from customer account, So that the same is invested towards customer's RD account",Deposit
4,"As a banker, I want to view summaries of RD, So that I can let customer know when asked",Deposit


In [72]:
print('Total rows in the input file: ', len(df_Data),'\n')

Total rows in the input file:  48 



In [73]:
#From the input data above, we split the data into two. The first 'series' object ("df_train") would hold 
#the 'Stories' column from the CSV file and the second 'series' object ("df_test") would hold the 'Tag'. 

df_train = df_Data['Stories']                                                                
df_test  = df_Data['Tag']                                                                    


In [77]:
#We split the 'Stories' and their corresponding 'Tags' into trainnig and test data sets. We train on the 'training' data
#and test the model on the 'test' data.

X_train, X_test, y_train, y_test = train_test_split(df_train, df_test, test_size = .25, shuffle='y', random_state=42)


In [78]:
#Since computers can understand numbers and not text, we want to convert our 'Stories' to numbers:

TfIdf_Vectorizer = TfidfVectorizer()                                                      # Instantiate the Object
X_train_transformed = TfIdf_Vectorizer.fit_transform(X_train)                             # Let Model learn
X_test_transformed  = TfIdf_Vectorizer.transform(X_test)                                  # Tranform test data


In [100]:
print('Vocabulary: ', TfIdf_Vectorizer.vocabulary_)                                      # Get the vocabulary

Vocabulary:  {'as': 6, 'rd': 75, 'holder': 44, 'want': 102, 'to': 95, 'choose': 19, 'option': 66, 'of': 59, 'account': 0, 'so': 87, 'that': 93, 'bank': 9, 'can': 13, 'deposit': 28, 'my': 57, 'amount': 1, 'on': 61, 'maturity': 54, 'such': 91, 'change': 17, 'monthly': 56, 'emi': 36, 'debit': 25, 'prepay': 70, 'and': 2, 'close': 20, 'education': 34, 'loan': 53, 'early': 33, 'check': 18, 'eligibility': 35, 'apply': 4, 'fd': 37, 'banker': 10, 'view': 101, 'summaries': 92, 'savings': 84, 'print': 72, 'it': 50, 'share': 86, 'with': 105, 'customer': 24, 'know': 51, 'for': 38, 'credit': 22, 'card': 14, 'one': 62, 'select': 85, 'nominee': 58, 'let': 52, 'details': 29, 'current': 23, 'balance': 8, 'review': 80, 'uploaded': 99, 'documents': 31, 'decide': 26, 'if': 46, 'sanction': 82, 'the': 94, 'quantum': 74, 'vehicle': 100, 'requested': 79, 'report': 77, 'approval': 5, 'open': 64, 'start': 88, 'saving': 83, 'receive': 76, 'statements': 90, 'frequency': 39, 'have': 42, 'history': 43, 'payments': 6

In [102]:
X_train[:1]                                                                              # Get first row from Training Data

6    As a RD holder, I want to choose option of account, So that bank can deposit my amount on maturity  to such account
Name: Stories, dtype: object

In [104]:
cols = ['feature: '+ name for name in TfIdf_Vectorizer.get_feature_names()]              # Get Feature names
df_TfIdf= pd.DataFrame(X_train_transformed.todense(), columns=cols)                      # Create a Data Frame
df_TfIdf                                                                                 # View the Data Frame

Unnamed: 0,feature: account,feature: amount,feature: and,feature: annually,feature: apply,feature: approval,feature: as,feature: asked,feature: balance,feature: bank,...,feature: towards,feature: update,feature: upload,feature: uploaded,feature: vehicle,feature: view,feature: want,feature: when,feature: which,feature: with
0,0.238783,0.212457,0.0,0.0,0.0,0.0,0.096003,0.0,0.0,0.24303,...,0.0,0.0,0.0,0.0,0.0,0.0,0.096003,0.0,0.0,0.0
1,0.11118,0.197844,0.288281,0.0,0.0,0.0,0.0894,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0894,0.0,0.0,0.0
2,0.160843,0.0,0.0,0.0,0.417055,0.0,0.129334,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.129334,0.0,0.0,0.0
3,0.238783,0.212457,0.0,0.0,0.0,0.0,0.096003,0.0,0.0,0.24303,...,0.0,0.0,0.0,0.0,0.0,0.0,0.096003,0.0,0.0,0.0
4,0.112872,0.0,0.29267,0.0,0.0,0.0,0.090761,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.229759,0.090761,0.0,0.0,0.355581
5,0.126901,0.0,0.0,0.0,0.329045,0.0,0.102041,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.102041,0.0,0.0,0.0
6,0.223697,0.0,0.0,0.0,0.0,0.0,0.089938,0.0,0.0,0.227675,...,0.0,0.0,0.0,0.0,0.0,0.0,0.089938,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.133526,0.0,0.43057,0.0,...,0.0,0.0,0.0,0.0,0.0,0.338017,0.133526,0.0,0.0,0.0
8,0.14981,0.0,0.0,0.0,0.0,0.0,0.120463,0.0,0.388446,0.0,...,0.0,0.0,0.0,0.0,0.0,0.304948,0.120463,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.09703,0.0,0.0,0.0,...,0.0,0.0,0.0,0.312884,0.273542,0.0,0.09703,0.0,0.0,0.0


In [80]:
#We transform the target (i.e., the Tag column) into numbers

targets = LabelEncoder()                                                                 # Instantiate the Object
y_train_transformed = targets.fit_transform(y_train)                                     # Let Model learn 
y_test_transformed = targets.transform(y_test)                                           # Tranform the label

In [88]:
print('Tags:', targets.classes_)

Tags: ['Cards' 'Deposit' 'Loans']


In [82]:
#Create the Model
LSVC = LinearSVC(C = 3)                                                                  # The model. 'C' is hyperparamater

In [83]:
LSVC.fit(X_train_transformed,y_train_transformed)                                        # The model learns

LinearSVC(C=3, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [84]:
predictions = LSVC.predict(X_test_transformed)                                           # Predict labels for unseen data

In [86]:
print('Accuracy: ', accuracy_score(y_test_transformed, predictions))                     # Calculate Model Score

Accuracy:  1.0


We know have a model to calculate the tag for a new User Story. Let us try for one below

In [99]:
label = ''
UserStory = ['As a banker, I want to view RD account without nominees, So that I can ask customer to update the nominee',
             'As a banker, I want to update monthly EMI against Loan, So that the new EMI is withdrawn from customer account',
             'As a customer, I want to know to upgrade debit card, So that I can go for higher limit'
            ]


for US in UserStory:

    UserStory_Vectorized = TfIdf_Vectorizer.transform([US])
    Predicted_Value = LSVC.predict(UserStory_Vectorized)

    if Predicted_Value ==0:
        label = 'Cards'
    elif Predicted_Value ==1:
        label = 'Deposit'
    elif Predicted_Value ==2:
        label = 'Loans'

    print('User Story: ', US)
    print('The tag for the User Story is: ', label,'\n' )

User Story:  As a banker, I want to view RD account without nominees, So that I can ask customer to update the nominee
The tag for the User Story is:  Deposit 

User Story:  As a banker, I want to update monthly EMI against Loan, So that the new EMI is withdrawn from customer account
The tag for the User Story is:  Loans 

User Story:  As a customer, I want to know to upgrade debit card, So that I can go for higher limit
The tag for the User Story is:  Cards 

