# Se importan las bibliotecas necesarias

In [1]:
import pandas as pd
import re
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from catboost import CatBoostClassifier
%matplotlib inline

# Se lee el .csv

In [2]:
train = pd.read_csv('csv/train.csv')
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7613 entries, 0 to 7612
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        7613 non-null   int64 
 1   keyword   7552 non-null   object
 2   location  5080 non-null   object
 3   text      7613 non-null   object
 4   target    7613 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 297.5+ KB


In [3]:
train.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [4]:
test = pd.read_csv('csv/test.csv')

In [5]:
word2vec_score = pd.read_csv('csv/solo_embedding_word2vec_train.csv', usecols=['word2vec_score'])
word2vec_score_test = pd.read_csv('csv/solo_embedding_word2vec_test.csv', usecols=['word2vec_score'])

In [6]:
elmo_score = pd.read_csv('csv/solo_embedding_elmo_train.csv', usecols=['elmo_score'])
elmo_score_test = pd.read_csv('csv/solo_embedding_elmo_test.csv', usecols=['elmo_score'])

In [7]:
glove_score = pd.read_csv('csv/solo_embedding_glove_train.csv', usecols=['glove_score'])
glove_score_test = pd.read_csv('csv/solo_embedding_glove_test.csv', usecols=['glove_score'])

In [8]:
bert_score = pd.read_csv('csv/solo_embedding_bert_train.csv', usecols=['bert_score'])
bert_score_test = pd.read_csv('csv/solo_embedding_bert_test.csv', usecols=['bert_score'])

In [9]:
word2vec_net = pd.read_csv('csv/train_red_w2v.csv', usecols=['w2v_net'])
word2vec_net_test = pd.read_csv('csv/test_red_w2v.csv', usecols=['w2v_net'])

In [10]:
glove_net = pd.read_csv('csv/train_red_glove.csv', usecols=['glove_net'])
glove_net_test = pd.read_csv('csv/test_red_glove.csv', usecols=['glove_net'])

In [11]:
bert_net = pd.read_csv('csv/train_red_bert.csv', usecols=['bert_net'])
bert_net_test = pd.read_csv('csv/test_red_bert.csv', usecols=['bert_net'])

In [12]:
elmo_net = pd.read_csv('csv/train_red_elmo.csv', usecols=['elmo_net'])
elmo_net_test = pd.read_csv('csv/test_red_elmo.csv', usecols=['elmo_net'])

In [13]:
train['word2vec_tree'] = word2vec_score.round().astype(int)
train['elmo_tree'] = elmo_score.round().astype(int)
train['glove_tree'] = glove_score.round().astype(int)
train['bert_tree'] = bert_score.round().astype(int)
train['bert_net'] = bert_net
train['elmo_net'] = elmo_net
train['w2v_net'] = word2vec_net
train['glove_net'] = glove_net

In [14]:
train.drop(columns=['keyword','location','text'],inplace=True)

In [15]:
train.head()

Unnamed: 0,id,target,word2vec_tree,elmo_tree,glove_tree,bert_tree,bert_net,elmo_net,w2v_net,glove_net
0,1,1,1,1,1,1,0,1,0,0
1,4,1,1,1,1,1,0,0,1,1
2,5,1,1,1,1,1,1,1,1,1
3,6,1,1,1,1,1,1,1,1,1
4,7,1,1,1,1,1,1,1,1,1


In [16]:
test['word2vec_tree'] = word2vec_score_test.round().astype(int)
test['elmo_tree'] = elmo_score_test.round().astype(int)
test['glove_tree'] = glove_score_test.round().astype(int)
test['bert_tree'] = bert_score_test.round().astype(int)
test['bert_net'] = bert_net_test
test['elmo_net'] = elmo_net_test
test['w2v_net'] = word2vec_net_test
test['glove_net'] = glove_net_test

In [17]:
test.drop(columns=['keyword','location','text'],inplace=True)

In [18]:
test.head()

Unnamed: 0,id,word2vec_tree,elmo_tree,glove_tree,bert_tree,bert_net,elmo_net,w2v_net,glove_net
0,0,1,0,1,0,1,1,1,1
1,2,1,0,1,1,1,1,1,1
2,3,1,1,0,0,1,1,1,1
3,9,1,0,1,1,1,1,1,1
4,11,1,1,1,1,1,1,1,1


In [21]:
aux = train.iloc[:,2:10]
aux['suma'] = aux.sum(axis=1)
train['prediction'] = (aux['suma']/8).round().astype(int)
train.head()

Unnamed: 0,id,target,word2vec_tree,elmo_tree,glove_tree,bert_tree,bert_net,elmo_net,w2v_net,glove_net,prediction
0,1,1,1,1,1,1,0,1,0,0,1
1,4,1,1,1,1,1,0,0,1,1,1
2,5,1,1,1,1,1,1,1,1,1,1
3,6,1,1,1,1,1,1,1,1,1,1
4,7,1,1,1,1,1,1,1,1,1,1


In [23]:
print('Accuracy Score : ' + str(accuracy_score(train['target'],train['prediction'])))
print('Precision Score : ' + str(precision_score(train['target'],train['prediction'])))
print('Recall Score : ' + str(recall_score(train['target'],train['prediction'])))
print('F1 Score : ' + str(f1_score(train['target'],train['prediction'])))

Accuracy Score : 0.8821752265861027
Precision Score : 0.955487336914812
Recall Score : 0.7612350963008254
F1 Score : 0.8473711077080143


In [22]:
aux2 = test.iloc[:,1:9]
aux2['suma'] = aux2.sum(axis=1)
test['prediction'] = (aux2['suma']/8).round().astype(int)
test.head()

Unnamed: 0,id,word2vec_tree,elmo_tree,glove_tree,bert_tree,bert_net,elmo_net,w2v_net,glove_net,prediction
0,0,1,0,1,0,1,1,1,1,1
1,2,1,0,1,1,1,1,1,1,1
2,3,1,1,0,0,1,1,1,1,1
3,9,1,0,1,1,1,1,1,1,1
4,11,1,1,1,1,1,1,1,1,1


In [24]:
test['prediction'].to_frame().to_csv('boliviano.csv')

In [27]:
aux3 = test.iloc[:,5:9]
aux3['suma'] = aux3.sum(axis=1)
test['prediction'] = (aux3['suma']/4).round().astype(int)
test.head()

Unnamed: 0,id,word2vec_tree,elmo_tree,glove_tree,bert_tree,bert_net,elmo_net,w2v_net,glove_net,prediction
0,0,1,0,1,0,1,1,1,1,1
1,2,1,0,1,1,1,1,1,1,1
2,3,1,1,0,0,1,1,1,1,1
3,9,1,0,1,1,1,1,1,1,1
4,11,1,1,1,1,1,1,1,1,1


In [28]:
test['prediction'].to_frame().to_csv('boliviano2.csv')

In [29]:
final = test['prediction'].to_frame()

In [34]:
final['id'] = test['id']
final.set_index('id',inplace=True)

In [30]:
final.rename(columns={'prediction':'target'},inplace=True)

In [35]:
final.head()

Unnamed: 0_level_0,target
id,Unnamed: 1_level_1
0,1
2,1
3,1
9,1
11,1


In [36]:
final.to_csv('mayor_voting_4_embedds_red.csv')