<a href="https://colab.research.google.com/github/KanthiKiranS/KK-Repo/blob/master/stance_26_7_2020.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import fastai

In [2]:
from fastai.text import * 

In [3]:
def read(path_to_file, shuffle=False):
    def enc(col):
        if col['label'] == 3.0:
            return 'Query'
        elif col['label'] == 2.0:
            return 'Deny'
        elif col['label'] == 1.0:
            return 'Support'
        elif col['label'] == 0.0:
            return 'Comment'

    df = pd.read_csv(path_to_file)
    df = df.drop(['Unnamed: 0', '0'], 1)
    df.columns = ['text', 'label']
    df['label'] = df.apply(enc, 1)
    if shuffle:
        df = df.sample(frac=1).reset_index(drop=True)

    return df.dropna().iloc[:, ::-1]

In [4]:
def read_veracity(path_to_file, shuffle=False):
    def enc(col):
        if col['label'] == 2.0:
            return 'Unverified'
        elif col['label'] == 1.0:
            return 'True'
        elif col['label'] == 0.0:
            return 'False'

    df = pd.read_csv(path_to_file)
    df = df.drop(['Unnamed: 0', '0'], 1)
    df.columns = ['text', 'label']
    df['label'] = df.apply(enc, 1)
    if shuffle:
        df = df.sample(frac=1).reset_index(drop=True)

    return df.dropna().iloc[:, ::-1]

In [5]:
# For stance
train = read('train_stance.csv', True)
test = read('test_stance.csv')

In [6]:
train.to_csv('train.csv')
test.to_csv('test.csv')

In [7]:
pd.concat([train['text'], test['text']]).reset_index(drop=True).to_csv('text.csv')

In [8]:
data_lm = TextLMDataBunch.from_csv('', 'text.csv')
data_clas = TextClasDataBunch.from_csv('', 'train.csv', vocab=data_lm.train_ds.vocab, text_cols='text', label_cols='label', bs=32)

In [9]:
data_lm.save('data_lm_export.pkl')
data_clas.save('data_clas_export.pkl')

In [10]:
data_lm = load_data('', 'data_lm_export.pkl')
data_clas = load_data('', 'data_clas_export.pkl', bs=16)

In [11]:
learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5)
learn.fit_one_cycle(1, 1e-2, moms=(0.8, 0.7))

Downloading https://s3.amazonaws.com/fast-ai-modelzoo/wt103-fwd.tgz


epoch,train_loss,valid_loss,accuracy,time
0,5.682614,4.775503,0.222277,00:06


In [12]:
learn.unfreeze()
learn.fit_one_cycle(1, 1e-3, moms=(0.8, 0.7))

epoch,train_loss,valid_loss,accuracy,time
0,4.449545,4.029139,0.295446,00:08


In [13]:
learn.predict("This is a review about", n_words=10)

'This is a review about guess that making the opera will justify its everyday reports'

In [14]:
learn.save_encoder('ft_enc')

In [15]:
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5)
learn.load_encoder('ft_enc')

RNNLearner(data=TextClasDataBunch;

Train: LabelList (3389 items)
x: TextList
xxbos @joyce_karam xxmaj is he consider as terrorist or just a criminal person as his faith not xxmaj sunni xxmaj islam ?,xxbos @angryblacklady xxunk xxunk xxunk : xxmaj new xxmaj hashtag # xxup xxunk # xxmaj ferguson,xxbos xxmaj now what ! “ @bbcbreaking : xxmaj watch video showing gunfire inside xxmaj canada 's parliament in xxmaj ottawa http : / / t.co / xxunk http : / / t.co / xxunk ”,xxbos @rt_com this is xxunk up,xxbos @rt_com xxunk xxunk xxunk xxunk xxunk xxmaj sad day as soldier shot at xxmaj war xxmaj memorial reportedly xxunk to injuries .
y: CategoryList
Query,Comment,Deny,Support,Support
Path: .;

Valid: LabelList (848 items)
x: TextList
xxbos @cnnbrk xxup rip xxmaj bro,xxbos @jelani9 xxmaj the first xxunk is to get a xxunk so he can never be a cop again .,xxbos @france24_en : xxmaj both xxmaj charlie xxmaj hebdo suspects killed as police storm building ( police sources ) http : / / t.co / xxunk h

In [16]:
data_clas.show_batch()

text,target
xxbos .@marrty96 xxup and xxup yet xxup its xxup not xxup on / xxup in xxup the xxup tweet / xxup headline . xxup as xxup usual . & & xxup dont xxup fucking xxup tell xxup me xxup to xxup relax . \n xxup this xxup is xxup the xxup same xxup network xxup that xxup reported xxup xxunk,Support
xxbos @michaelskolnik @ac_bowen # xxmaj cleveland a xxmaj cop xxmaj shot a xxunk xxmaj old xxmaj in xxmaj the \n xxmaj head ! # xxmaj black # xxmaj unarmed # xxunk xxmaj cop xxmaj said xxmaj he # xxup thought xxmaj he \n xxmaj saw a xxmaj gun ! ! !,Support
"xxbos .@marrty96 xxup xxunk xxup save xxup me xxup the xxup fucking xxup xxunk , xxup and xxup when xxup my xxup xxunk xxup xxunk xxup members xxup are xxup being xxup held xxup hostage , xxup dont xxup xxunk xxup tell xxup me xxup to xxup calm xxup down .",Support
xxbos “ @independent : xxmaj up to 20 held hostage in xxmaj sydney xxmaj lindt xxmaj cafe siege http : / / t.co / xxunk http : / / t.co / xxunk ” xxmaj oh . xxmaj my . xxmaj god . i am xxup sick !,Comment
"xxbos xxup rt xxup @usatoday : xxmaj name of # xxmaj ferguson cop is expected to be released by 9 xxup et http : / / t.co / xxunk ( xxup j.b. xxmaj forbes , xxup ap ) http : / / t.co / xxunk ”",Comment


In [17]:
learn.fit_one_cycle(1, 1e-2, moms=(0.8, 0.7))

epoch,train_loss,valid_loss,accuracy,time
0,0.969745,0.857039,0.691038,00:10


In [18]:
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(5e-3/2., 5e-3), moms=(0.8, 0.7))

epoch,train_loss,valid_loss,accuracy,time
0,0.903215,0.84624,0.700472,00:11


In [19]:
learn.unfreeze()
learn.fit_one_cycle(10, slice(2e-3/100, 2e-3), moms=(0.8, 0.7))

epoch,train_loss,valid_loss,accuracy,time
0,0.83777,0.834226,0.706368,00:24
1,0.830893,0.81098,0.711085,00:24
2,0.834593,0.823695,0.71934,00:21
3,0.789623,0.832025,0.715802,00:21
4,0.673535,0.880814,0.707547,00:24
5,0.599561,0.913248,0.705189,00:22
6,0.513114,0.982981,0.681604,00:22
7,0.444447,0.974016,0.704009,00:22
8,0.398689,1.000664,0.698113,00:23
9,0.375496,0.97197,0.700472,00:24


In [20]:
l = []
for text in test['text']:
    l.append(str(learn.predict(text)[0]))

In [21]:
from sklearn.metrics import accuracy_score, f1_score

In [22]:
accuracy_score(test['label'], l)

0.7117437722419929

In [23]:
f1_score(test['label'], l, average='macro')

0.5416518122400475

In [24]:
learn.export('veracity.pkl')