In [54]:
import pandas as pd
import numpy as np
import re

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', None)

filepath = u"./bio_format_resume.tsv"

data = pd.read_csv(filepath,sep="\t").dropna()
data.describe()

Unnamed: 0,Word,Tag
count,93954,93954
unique,17397,12
top,and,O
freq,3697,82543


In [55]:
#remove special characters
data["Word"] = data["Word"].apply(lambda x: re.sub(r'[^\x00-\x7f]',r'', x) )

words = list(set(data["Word"].values))
n_words = len(words)  

# group word by sentence
sentence_columns = []
for i in range(3758):
    for j in range(25):
        sentence_columns.append(i)
for i in range(4):
    sentence_columns.append(3758)
    

data['Sentence #'] = sentence_columns
data =  data[["Sentence #","Word","Tag"]]
data

17390


Unnamed: 0,Sentence #,Word,Tag
0,0,Govardhana,Name
1,0,K,Name
2,0,Senior,Profil
3,0,Software,Profil
4,0,Engineer,Profil
...,...,...,...
94174,3757,tuning,O
94175,3758,and,O
94176,3758,escalating,O
94177,3758,Security,O


In [56]:
## remove random O-Tag
#import random
import math

index = data.index[data['Tag'] == "O"]
O_tagSize = math.ceil((len(index)*0.9))

drop_indices = np.random.choice(index, O_tagSize, replace=False)
data = data.drop(drop_indices)
print(len(data))
data["Tag"].value_counts()

#data.to_json("./resume_sentence_dataset.json")

19665


O                      8254
Skills                 6076
Profil                 1175
College Name            958
Degree                  930
Companies worked at     928
Name                    395
Location                381
Email Address           287
Graduation Year         202
Years of Experience      73
UNKNOWN                   6
Name: Tag, dtype: int64

In [57]:
len(data["Word"])

19665

In [60]:
class SentenceGetter(object):
    
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w,t) for w, t in zip(s["Word"].values.tolist(),s["Tag"].values.tolist())]
        self.grouped = self.data.groupby("Sentence #").apply(agg_func)
        self.sentences = [s for s in self.grouped]
    
    def get_next(self):
        try:
            s = self.grouped[self.n_sent]
            self.n_sent += 1
            return s
        except:
            return None
        
getter = SentenceGetter(data)

In [61]:
sent = getter.get_next()
print(sent)
sentences = getter.sentences


[('Solutions', 'Companies worked at'), ('4', 'O'), ('Oracle', 'Companies worked at'), ('Oracle', 'Companies worked at')]


In [62]:
def word2features(sent, i):
    word = sent[i][0]
    postag = sent[i][1]

    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
    }
    if i > 0:
        word1 = sent[i-1][0]
        postag1 = sent[i-1][1]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
        })
    else:
        features['BOS'] = True

    if i < len(sent)-1:
        word1 = sent[i+1][0]
        postag1 = sent[i+1][1]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
        })
    else:
        features['EOS'] = True

    return features


def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, label in sent]

def sent2tokens(sent):
    return [token for token, label in sent]

In [63]:
X = [sent2features(s) for s in sentences]

y = [sent2labels(s) for s in sentences]

In [64]:
from sklearn.model_selection import train_test_split

train_docs, test_docs, train_labels, test_labels = train_test_split(X, y)

len(train_docs), len(test_docs)

(2678, 893)

In [88]:
from sklearn_crfsuite import CRF

crf = CRF(algorithm='lbfgs',
          c1=0.1,
          c2=0.1,
          max_iterations=100,
          all_possible_transitions=False)

crf.fit(train_docs, train_labels)

CRF(algorithm='lbfgs', all_possible_states=None, all_possible_transitions=False,
    averaging=None, c=None, c1=0.1, c2=0.1, calibration_candidates=None,
    calibration_eta=None, calibration_max_trials=None, calibration_rate=None,
    calibration_samples=None, delta=None, epsilon=None, error_sensitive=None,
    gamma=None, keep_tempfiles=None, linesearch=None, max_iterations=100,
    max_linesearch=None, min_freq=None, model_filename=None, num_memories=None,
    pa_type=None, period=None, trainer_cls=None, variance=None, verbose=False)

In [85]:
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

labels = list(crf.classes_)

labels

y_pred = crf.predict(test_docs)
metrics.flat_f1_score(test_labels, y_pred,
                      average='weighted', labels=labels)

0.8581132151125516

In [86]:
sorted_labels = sorted(
    labels,
    key=lambda name: (name[1:], name[0])
)
print(metrics.flat_classification_report(
    test_labels, y_pred, digits=3
))

                     precision    recall  f1-score   support

       College Name      0.802     0.672     0.731       259
Companies worked at      0.782     0.701     0.739       194
             Degree      0.870     0.751     0.806       205
      Email Address      0.969     0.373     0.539        83
    Graduation Year      0.829     0.708     0.764        48
           Location      0.950     0.710     0.813       107
               Name      0.800     0.621     0.699       103
                  O      0.845     0.934     0.887      2006
             Profil      0.821     0.686     0.747       280
             Skills      0.906     0.929     0.918      1600
            UNKNOWN      0.000     0.000     0.000         2
Years of Experience      1.000     0.636     0.778        22

           accuracy                          0.863      4909
          macro avg      0.798     0.644     0.702      4909
       weighted avg      0.863     0.863     0.858      4909



  _warn_prf(average, modifier, msg_start, len(result))


In [95]:
import eli5
eli5.show_weights(crf, top=30)



From \ To,College Name,Companies worked at,Degree,Email Address,Graduation Year,Location,Name,O,Profil,Skills,UNKNOWN,Years of Experience
College Name,4.085,0.0,0.0,0.0,0.803,0.0,0.0,0.0,0.0,0.046,0.0,0.0
Companies worked at,0.0,3.143,0.0,0.0,0.0,1.029,0.0,0.964,0.0,0.0,0.0,0.0
Degree,3.573,0.0,4.492,0.0,0.0,0.0,0.0,-0.042,0.0,0.0,0.0,0.0
Email Address,0.0,0.0,0.0,2.131,0.0,0.0,0.0,0.226,0.377,0.0,0.0,0.0
Graduation Year,0.0,0.0,0.461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Location,0.0,0.0,0.0,1.652,0.0,0.48,0.0,0.776,0.0,0.0,0.0,0.0
Name,0.0,0.0,0.0,0.0,0.0,0.75,3.308,-0.339,2.075,0.0,0.0,0.0
O,-0.531,0.497,0.404,0.116,0.0,0.0,0.0,2.261,0.39,0.0,0.0,0.0
Profil,0.0,2.436,0.0,0.0,0.0,0.0,0.0,0.0,3.593,0.0,0.0,0.0
Skills,0.0,0.0,0.0,0.0,0.0,0.0,1.007,0.0,-0.362,5.134,0.0,0.0

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10
Weight?,Feature,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11
+1.937,word.lower():university,,,,,,,,,,
+1.678,+1:word.lower():college,,,,,,,,,,
+1.478,+1:word.lower():institute,,,,,,,,,,
+1.477,+1:word.lower():university,,,,,,,,,,
+0.970,-1:word.lower():of,,,,,,,,,,
+0.960,word[-2:]:ol,,,,,,,,,,
+0.771,word.lower():college,,,,,,,,,,
+0.671,word[-2:]:ge,,,,,,,,,,
+0.544,+1:word.lower():school,,,,,,,,,,
+0.537,word.lower():institute,,,,,,,,,,

Weight?,Feature
1.937,word.lower():university
1.678,+1:word.lower():college
1.478,+1:word.lower():institute
1.477,+1:word.lower():university
0.97,-1:word.lower():of
0.96,word[-2:]:ol
0.771,word.lower():college
0.671,word[-2:]:ge
0.544,+1:word.lower():school
0.537,word.lower():institute

Weight?,Feature
+2.187,word.lower():cisco
+2.086,word.lower():infosys
+1.860,word.lower():oracle
+1.666,word[-2:]:AP
+1.456,word.lower():microsoft
+1.114,word.lower():amazon
+1.017,-1:word.lower():infosys
+0.874,word.lower():ltd
+0.812,word[-3:]:zon
+0.759,word[-2:]:ft

Weight?,Feature
1.664,+1:word.lower():in
1.372,-1:word.lower():in
1.039,word.lower():bachelor
0.615,word[-2:]:ce
0.542,word[-3:]:ter
0.474,word.isupper()
0.473,-1:word.lower():bachelor
0.461,word[-3:]:ech
0.402,word.lower():in
0.401,word[-3:]:in

Weight?,Feature
2.408,-1:word.lower():indeed:
1.865,bias
0.267,-1:word.istitle()
-1.309,word.isupper()
-2.756,word.istitle()

Weight?,Feature
5.202,word.isdigit()
0.658,+1:word.isupper()
0.174,+1:word.istitle()
0.001,-1:word.istitle()
-0.503,word.istitle()

Weight?,Feature
2.535,word[-3:]:bad
2.149,word[-2:]:ai
1.832,word.lower():pune
1.601,word.istitle()
1.427,word[-3:]:uru
1.427,word.lower():bengaluru
1.418,word[-3:]:ore
1.219,word[-2:]:ru
1.154,word[-2:]:ad
0.858,word[-3:]:une

Weight?,Feature
1.263,word.istitle()
1.247,word[-2:]:an
0.368,+1:word.istitle()
0.1,word[-2:]:ni
0.017,word[-2:]:ar
-0.526,bias

Weight?,Feature
+2.309,bias
+1.991,BOS
+1.799,EOS
+0.759,word[-2:]:ed
+0.507,word[-3:]:ent
+0.356,word[-3:]:ing
+0.352,word[-2:]:-
+0.352,word.lower():-
+0.352,word[-3:]:-
+0.318,word[-2:]:il

Weight?,Feature
+1.716,+1:word.lower():engineer
+1.529,word.lower():engineer
+1.511,word.lower():consultant
+1.439,word[-2:]:er
+1.247,word.lower():associate
+1.206,word.lower():lead
+1.169,word[-2:]:or
+1.086,word.lower():senior
+1.049,word[-2:]:st
+0.886,word[-3:]:ant

Weight?,Feature
+1.181,"word[-2:]:),"
+0.597,BOS
+0.569,word[-2:]:s:
+0.409,word.isupper()
+0.363,"word[-2:]:s,"
+0.343,-1:word.lower():than
+0.309,word[-2:]:ng
+0.301,EOS
+0.212,+1:word.lower():than
+0.203,word[-3:]:ing

Weight?,Feature
-0.78,bias

Weight?,Feature
2.065,word[-2:]:rs
1.625,word.isdigit()
1.287,word[-3:]:ars
0.782,+1:word.lower():years
0.043,word.lower():years
-0.707,word.istitle()


In [90]:
crf = CRF(algorithm='lbfgs',
          c1=10,
          c2=0.1,
          max_iterations=100,
          all_possible_transitions=False)


crf.fit(train_docs, train_labels)

CRF(algorithm='lbfgs', all_possible_states=None, all_possible_transitions=False,
    averaging=None, c=None, c1=10, c2=0.1, calibration_candidates=None,
    calibration_eta=None, calibration_max_trials=None, calibration_rate=None,
    calibration_samples=None, delta=None, epsilon=None, error_sensitive=None,
    gamma=None, keep_tempfiles=None, linesearch=None, max_iterations=100,
    max_linesearch=None, min_freq=None, model_filename=None, num_memories=None,
    pa_type=None, period=None, trainer_cls=None, variance=None, verbose=False)

In [91]:
y_pred = crf.predict(test_docs)
metrics.flat_f1_score(test_labels, y_pred,
                      average='weighted', labels=labels)

0.7295256324466686

In [92]:
sorted_labels = sorted(
    labels,
    key=lambda name: (name[1:], name[0])
)
print(metrics.flat_classification_report(
    test_labels, y_pred, labels=sorted_labels, digits=3
))

                     precision    recall  f1-score   support

                  O      0.845     0.916     0.879      2006
            UNKNOWN      0.000     0.000     0.000         2
               Name      0.667     0.019     0.038       103
Years of Experience      0.000     0.000     0.000        22
             Degree      0.694     0.366     0.479       205
             Skills      0.718     0.969     0.825      1600
      Email Address      0.000     0.000     0.000        83
           Location      1.000     0.271     0.426       107
       College Name      0.692     0.347     0.463       259
Companies worked at      0.696     0.402     0.510       194
    Graduation Year      0.632     0.250     0.358        48
             Profil      0.529     0.329     0.405       280

           accuracy                          0.767      4909
          macro avg      0.539     0.323     0.365      4909
       weighted avg      0.745     0.767     0.730      4909



In [94]:
import eli5
eli5.show_weights(crf, top=30)



From \ To,College Name,Companies worked at,Degree,Email Address,Graduation Year,Location,Name,O,Profil,Skills,UNKNOWN,Years of Experience
College Name,4.085,0.0,0.0,0.0,0.803,0.0,0.0,0.0,0.0,0.046,0.0,0.0
Companies worked at,0.0,3.143,0.0,0.0,0.0,1.029,0.0,0.964,0.0,0.0,0.0,0.0
Degree,3.573,0.0,4.492,0.0,0.0,0.0,0.0,-0.042,0.0,0.0,0.0,0.0
Email Address,0.0,0.0,0.0,2.131,0.0,0.0,0.0,0.226,0.377,0.0,0.0,0.0
Graduation Year,0.0,0.0,0.461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Location,0.0,0.0,0.0,1.652,0.0,0.48,0.0,0.776,0.0,0.0,0.0,0.0
Name,0.0,0.0,0.0,0.0,0.0,0.75,3.308,-0.339,2.075,0.0,0.0,0.0
O,-0.531,0.497,0.404,0.116,0.0,0.0,0.0,2.261,0.39,0.0,0.0,0.0
Profil,0.0,2.436,0.0,0.0,0.0,0.0,0.0,0.0,3.593,0.0,0.0,0.0
Skills,0.0,0.0,0.0,0.0,0.0,0.0,1.007,0.0,-0.362,5.134,0.0,0.0

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10
Weight?,Feature,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11
+1.937,word.lower():university,,,,,,,,,,
+1.678,+1:word.lower():college,,,,,,,,,,
+1.478,+1:word.lower():institute,,,,,,,,,,
+1.477,+1:word.lower():university,,,,,,,,,,
+0.970,-1:word.lower():of,,,,,,,,,,
+0.960,word[-2:]:ol,,,,,,,,,,
+0.771,word.lower():college,,,,,,,,,,
+0.671,word[-2:]:ge,,,,,,,,,,
+0.544,+1:word.lower():school,,,,,,,,,,
+0.537,word.lower():institute,,,,,,,,,,

Weight?,Feature
1.937,word.lower():university
1.678,+1:word.lower():college
1.478,+1:word.lower():institute
1.477,+1:word.lower():university
0.97,-1:word.lower():of
0.96,word[-2:]:ol
0.771,word.lower():college
0.671,word[-2:]:ge
0.544,+1:word.lower():school
0.537,word.lower():institute

Weight?,Feature
+2.187,word.lower():cisco
+2.086,word.lower():infosys
+1.860,word.lower():oracle
+1.666,word[-2:]:AP
+1.456,word.lower():microsoft
+1.114,word.lower():amazon
+1.017,-1:word.lower():infosys
+0.874,word.lower():ltd
+0.812,word[-3:]:zon
+0.759,word[-2:]:ft

Weight?,Feature
1.664,+1:word.lower():in
1.372,-1:word.lower():in
1.039,word.lower():bachelor
0.615,word[-2:]:ce
0.542,word[-3:]:ter
0.474,word.isupper()
0.473,-1:word.lower():bachelor
0.461,word[-3:]:ech
0.402,word.lower():in
0.401,word[-3:]:in

Weight?,Feature
2.408,-1:word.lower():indeed:
1.865,bias
0.267,-1:word.istitle()
-1.309,word.isupper()
-2.756,word.istitle()

Weight?,Feature
5.202,word.isdigit()
0.658,+1:word.isupper()
0.174,+1:word.istitle()
0.001,-1:word.istitle()
-0.503,word.istitle()

Weight?,Feature
2.535,word[-3:]:bad
2.149,word[-2:]:ai
1.832,word.lower():pune
1.601,word.istitle()
1.427,word[-3:]:uru
1.427,word.lower():bengaluru
1.418,word[-3:]:ore
1.219,word[-2:]:ru
1.154,word[-2:]:ad
0.858,word[-3:]:une

Weight?,Feature
1.263,word.istitle()
1.247,word[-2:]:an
0.368,+1:word.istitle()
0.1,word[-2:]:ni
0.017,word[-2:]:ar
-0.526,bias

Weight?,Feature
+2.309,bias
+1.991,BOS
+1.799,EOS
+0.759,word[-2:]:ed
+0.507,word[-3:]:ent
+0.356,word[-3:]:ing
+0.352,word[-2:]:-
+0.352,word.lower():-
+0.352,word[-3:]:-
+0.318,word[-2:]:il

Weight?,Feature
+1.716,+1:word.lower():engineer
+1.529,word.lower():engineer
+1.511,word.lower():consultant
+1.439,word[-2:]:er
+1.247,word.lower():associate
+1.206,word.lower():lead
+1.169,word[-2:]:or
+1.086,word.lower():senior
+1.049,word[-2:]:st
+0.886,word[-3:]:ant

Weight?,Feature
+1.181,"word[-2:]:),"
+0.597,BOS
+0.569,word[-2:]:s:
+0.409,word.isupper()
+0.363,"word[-2:]:s,"
+0.343,-1:word.lower():than
+0.309,word[-2:]:ng
+0.301,EOS
+0.212,+1:word.lower():than
+0.203,word[-3:]:ing

Weight?,Feature
-0.78,bias

Weight?,Feature
2.065,word[-2:]:rs
1.625,word.isdigit()
1.287,word[-3:]:ars
0.782,+1:word.lower():years
0.043,word.lower():years
-0.707,word.istitle()
