# Se importan las bibliotecas necesarias

In [1]:
import pandas as pd
import re
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.ensemble import RandomForestRegressor, VotingRegressor, GradientBoostingClassifier
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from catboost import CatBoostRegressor
%matplotlib inline

# Se lee el .csv

In [2]:
train = pd.read_csv('csv/train.csv')
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7613 entries, 0 to 7612
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        7613 non-null   int64 
 1   keyword   7552 non-null   object
 2   location  5080 non-null   object
 3   text      7613 non-null   object
 4   target    7613 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 297.5+ KB


In [3]:
ids_with_target_error = [328,443,513,2619,3640,3900,4342,5781,6552,6554,6570,6701,6702,6729,6861,7226]
train.loc[train['id'].isin(ids_with_target_error),'target'] = 0

In [4]:
col_rename = {'id': 'tp_datos_id', 'keyword': 'tp_datos_keyword', 'location': 'tp_datos_location', 'text': 'tp_datos_text'}
train = train.rename(columns= col_rename)
train.head()

Unnamed: 0,tp_datos_id,tp_datos_keyword,tp_datos_location,tp_datos_text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [5]:
train.head()

Unnamed: 0,tp_datos_id,tp_datos_keyword,tp_datos_location,tp_datos_text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [6]:
def clean_text(df):
    words = df['tp_datos_text'].str.split()
    clean_words = []
    clean_texts = []

    for sentence in words:
        clean_sentence = []
        for word in sentence:
            clean_word = re.sub('[^A-Za-z0-9]+','', word)
            clean_word = re.sub('[0-9]+', '', clean_word)
            if(clean_word != ''):
                clean_sentence.append(clean_word.lower())
        clean_words.append(clean_sentence)
    for words in clean_words:
        clean_text = ''
        for word in words:
            #word_without_num = ''.join([i for i in word if not i.isdigit()])
            #if(word_without_num != ''):
                clean_text += ' ' + word
             #   clean_text += ' ' + word_without_num
        clean_texts.append(clean_text)
    return clean_texts
train['clean_text']= clean_text(train)
train.head()

Unnamed: 0,tp_datos_id,tp_datos_keyword,tp_datos_location,tp_datos_text,target,clean_text
0,1,,,Our Deeds are the Reason of this #earthquake M...,1,our deeds are the reason of this earthquake m...
1,4,,,Forest fire near La Ronge Sask. Canada,1,forest fire near la ronge sask canada
2,5,,,All residents asked to 'shelter in place' are ...,1,all residents asked to shelter in place are b...
3,6,,,"13,000 people receive #wildfires evacuation or...",1,people receive wildfires evacuation orders in...
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1,just got sent this photo from ruby alaska as ...


In [7]:
vocab = pd.read_csv('negative-words.txt', header=None)
vocab = vocab[0]
vocab

0         abnormal
1          abolish
2       abominable
3       abominably
4        abominate
           ...    
4776          zaps
4777        zealot
4778       zealous
4779     zealously
4780        zombie
Name: 0, Length: 4781, dtype: object

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
v = TfidfVectorizer(min_df=2, stop_words= {'english'}, max_df= 0.60, ngram_range= (1,2))
x = v.fit_transform(train['clean_text'])
tf = pd.DataFrame.sparse.from_spmatrix(x)
tf.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16703,16704,16705,16706,16707,16708,16709,16710,16711,16712
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
rango = tf.shape[1] 

In [10]:
cols = v.get_feature_names()

In [11]:
palabras = []
for i in range (0, rango):
    if tf[i].mean() == 0:
        del tf[i]
    else: 
        palabras.append(i)
len(palabras)

16713

In [12]:
palabras[6400]

6400

In [13]:
col_rename_tf = {}
type(col_rename)
for palabra in palabras: 
    col_rename[palabra] = str(cols[palabra])

In [14]:
train.head()

Unnamed: 0,tp_datos_id,tp_datos_keyword,tp_datos_location,tp_datos_text,target,clean_text
0,1,,,Our Deeds are the Reason of this #earthquake M...,1,our deeds are the reason of this earthquake m...
1,4,,,Forest fire near La Ronge Sask. Canada,1,forest fire near la ronge sask canada
2,5,,,All residents asked to 'shelter in place' are ...,1,all residents asked to shelter in place are b...
3,6,,,"13,000 people receive #wildfires evacuation or...",1,people receive wildfires evacuation orders in...
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1,just got sent this photo from ruby alaska as ...


In [15]:
tf.rename(columns=col_rename, inplace=True)
columnas_tf = tf.columns
tf.head()

Unnamed: 0,aa,aba,aba as,abandon,abandoned,abandoned aircraft,abbott,abbswinston,abbswinston zionist,abc,...,zippednews,zombie,zombie apocalypse,zone,zone coming,zone of,zouma,zouma has,zss,zss vs
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
train = pd.concat([train, tf], axis=1, sort=False)

In [17]:
test = pd.read_csv('csv/test.csv')

In [18]:
col_rename = {'id': 'tp_datos_id', 'keyword': 'tp_datos_keyword', 'location': 'tp_datos_location', 'text': 'tp_datos_text'}
test = test.rename(columns= col_rename)
test.head()

Unnamed: 0,tp_datos_id,tp_datos_keyword,tp_datos_location,tp_datos_text
0,0,,,Just happened a terrible car crash
1,2,,,"Heard about #earthquake is different cities, s..."
2,3,,,"there is a forest fire at spot pond, geese are..."
3,9,,,Apocalypse lighting. #Spokane #wildfires
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan


In [19]:
test['clean_text']= clean_text(test)
x = v.transform(test['clean_text'])
tf = pd.DataFrame.sparse.from_spmatrix(x)
tf.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16703,16704,16705,16706,16707,16708,16709,16710,16711,16712
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
tf.columns = columnas_tf

In [21]:
tf.head()

Unnamed: 0,aa,aba,aba as,abandon,abandoned,abandoned aircraft,abbott,abbswinston,abbswinston zionist,abc,...,zippednews,zombie,zombie apocalypse,zone,zone coming,zone of,zouma,zouma has,zss,zss vs
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
test = pd.concat([test, tf], axis=1, sort=False)
del test['tp_datos_keyword']
del test['tp_datos_location']
del test['tp_datos_text']
del test['clean_text']
test.set_index('tp_datos_id', inplace=True)
test.head()

Unnamed: 0_level_0,aa,aba,aba as,abandon,abandoned,abandoned aircraft,abbott,abbswinston,abbswinston zionist,abc,...,zippednews,zombie,zombie apocalypse,zone,zone coming,zone of,zouma,zouma has,zss,zss vs
tp_datos_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
train.head()

Unnamed: 0,tp_datos_id,tp_datos_keyword,tp_datos_location,tp_datos_text,target,clean_text,aa,aba,aba as,abandon,...,zippednews,zombie,zombie apocalypse,zone,zone coming,zone of,zouma,zouma has,zss,zss vs
0,1,,,Our Deeds are the Reason of this #earthquake M...,1,our deeds are the reason of this earthquake m...,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4,,,Forest fire near La Ronge Sask. Canada,1,forest fire near la ronge sask canada,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5,,,All residents asked to 'shelter in place' are ...,1,all residents asked to shelter in place are b...,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,6,,,"13,000 people receive #wildfires evacuation or...",1,people receive wildfires evacuation orders in...,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1,just got sent this photo from ruby alaska as ...,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
del train['tp_datos_keyword']
del train['tp_datos_location']
del train['tp_datos_text']
del train['clean_text']
train.set_index('tp_datos_id', inplace=True)

### Entrenamiento del set de datos

In [25]:
X, y = train.iloc[:,train.columns != 'target'], train.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X_train

Unnamed: 0_level_0,aa,aba,aba as,abandon,abandoned,abandoned aircraft,abbott,abbswinston,abbswinston zionist,abc,...,zippednews,zombie,zombie apocalypse,zone,zone coming,zone of,zouma,zouma has,zss,zss vs
tp_datos_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3334,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9064,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
797,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9325,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7470,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7691,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10862,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
from sklearn.feature_selection import SelectPercentile, chi2
selec = SelectPercentile(chi2, percentile=10).fit(X_train, y_train)
X_new = selec.transform(X_train)
seleccionadas = pd.DataFrame(data=X_new)
seleccionadas

Unnamed: 0,0
0,"(0, 221)\t0.12478513331615565\n (0, 656)\t0..."
1,"(0, 1360)\t0.16607441862005867"
2,"(0, 138)\t0.3251832268054915\n (0, 831)\t0...."
3,"(0, 129)\t0.2681729469829434\n (0, 989)\t0...."
4,"(0, 241)\t0.1724289604374399\n (0, 343)\t0...."
...,...
6466,"(0, 82)\t0.2458879969458754\n (0, 217)\t0.1..."
6467,"(0, 42)\t0.10738774543067045\n (0, 59)\t0.0..."
6468,"(0, 59)\t0.12607098955469676"
6469,"(0, 17)\t0.12489088722751528\n (0, 39)\t0.2..."


In [27]:
cols = selec.get_support(indices=True)
cols
features = X.columns[cols]
features

Index(['abandoned', 'abbswinston', 'abbswinston zionist', 'abc', 'abc news',
       'absolutely', 'abstorm', 'accident', 'accident experts', 'accident man',
       ...
       'yourself', 'youth', 'youth saved', 'youtube', 'youtube video', 'yr',
       'yr old', 'yyc', 'zionist', 'zionist terrorist'],
      dtype='object', length=1672)

In [28]:
X_train = X_train.filter(items = features)
X_test = X_test.filter(items = features)

#### Red

In [29]:
import pandas as pd
import re
import numpy as np
import tensorflow_hub as hub
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Embedding, LSTM,Dense, SpatialDropout1D, Dropout
from keras import layers
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split

In [30]:
X_train

Unnamed: 0_level_0,abandoned,abbswinston,abbswinston zionist,abc,abc news,absolutely,abstorm,accident,accident experts,accident man,...,yourself,youth,youth saved,youtube,youtube video,yr,yr old,yyc,zionist,zionist terrorist
tp_datos_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3334,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9064,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
797,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9325,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7470,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7691,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10862,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
rango = X_train.shape[1] 
model = Sequential()
model.add(layers.Dense(1024, input_dim=rango, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(layers.Dense(512, input_dim=rango, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(layers.Dense(1, activation='sigmoid'))

In [32]:
optimizer=Adam(learning_rate=5e-5)
model.compile(loss='binary_crossentropy', 
              optimizer=optimizer, 
              metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1024)              1713152   
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 513       
Total params: 2,238,465
Trainable params: 2,238,465
Non-trainable params: 0
_________________________________________________________________


In [33]:
X_train = np.asarray(X_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)

In [34]:
history = model.fit(X_train, y_train,
                    epochs=50,
                    verbose=True,
                    validation_data=(X_test, y_test),
                    batch_size=10)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [35]:
X = X.filter(items = features)
train_pred = model.predict(X)
train_pred_int = train_pred

In [36]:
train['tfidf_net_bi'] = train_pred_int

In [37]:
train = train['tfidf_net_bi'].to_frame()

In [38]:
train.head()

Unnamed: 0_level_0,tfidf_net_bi
tp_datos_id,Unnamed: 1_level_1
1,0.564173
4,0.982303
5,0.812961
6,0.983006
7,0.498404


In [39]:
train['tfidf_net_bi'].value_counts()

0.400659    158
0.999686     24
0.999910     20
0.999766     19
0.201966     17
           ... 
0.143238      1
0.498114      1
0.979209      1
0.998290      1
0.119141      1
Name: tfidf_net_bi, Length: 6627, dtype: int64

In [40]:
train.to_csv('csv/train_TFIDF_bigramas.csv')

In [41]:
test = test.filter(items = features)
test.head()

Unnamed: 0_level_0,abandoned,abbswinston,abbswinston zionist,abc,abc news,absolutely,abstorm,accident,accident experts,accident man,...,yourself,youth,youth saved,youtube,youtube video,yr,yr old,yyc,zionist,zionist terrorist
tp_datos_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
test_pred = model.predict(test)
test_pred_int = test_pred

In [43]:
test['tfidf_net_bi'] = test_pred_int
test = test['tfidf_net_bi'].to_frame()
test.head()

Unnamed: 0_level_0,tfidf_net_bi
tp_datos_id,Unnamed: 1_level_1
0,0.731684
2,0.675684
3,0.806482
9,0.757722
11,0.997448


In [44]:
test['tfidf_net_bi'].value_counts()

0.400659    81
0.592622    11
0.385306    10
0.999686    10
0.201966     8
            ..
0.694795     1
0.752584     1
0.287865     1
0.100908     1
0.471152     1
Name: tfidf_net_bi, Length: 2959, dtype: int64

In [45]:
test.to_csv('csv/test_TFIDF_bigramas.csv')