# Network Setting 1

In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.multiclass import OneVsRestClassifier
import tensorflow as tf

train = pd.read_csv('train_new_2.csv')
train = train.fillna(0)

test = pd.read_csv('test_new_2.csv')
test = test.fillna(0)

test_id = test['Id']
test = test.drop(columns = ['Id', 'text', 'reply_to_screen_name', 'hashtags', 'clean_tweet'], axis = 1)


## Defining input and target
X = train.drop(columns = ['text', 'reply_to_screen_name', 'hashtags', 'clean_tweet', 'country'], axis = 1)
Y = train['country']
Y = np.where(Y == 'us', 0, 
             np.where(Y == 'uk', 1, 
                      np.where(Y == 'canada', 2, 
                               np.where(Y == 'australia', 3,
                                        np.where(Y == 'ireland', 4, 5)))))

## Splitting the data 
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

## Scaling the data 
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
test = scaler.fit_transform(test)

## Defining model 
model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(10, input_dim = 113, activation = 'relu'),
        tf.keras.layers.Dense(10, activation = 'relu'),
        tf.keras.layers.Dense(6, activation = 'softmax')
])

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

## Fitting model 
model.fit(X_train, tf.keras.utils.to_categorical(Y_train, num_classes = 6), epochs = 20, batch_size = 32, validation_data = (X_test, tf.keras.utils.to_categorical(Y_test, num_classes = 6)))

## Predicting on test
nn_pred = model.predict(test)
nn_pred = np.argmax(nn_pred, axis = 1)

## Defining data to be exported
data_out = pd.DataFrame({'Id': test_id, 'Category': nn_pred})
data_out['Category'] = np.where(data_out['Category'] == 0, 'us',
                                np.where(data_out['Category'] == 1, 'uk',
                                         np.where(data_out['Category'] == 2, 'canada',
                                                  np.where(data_out['Category'] == 3, 'australia',
                                                           np.where(data_out['Category'] == 4, 'ireland', 'new_zealand')))))
data_out.to_csv('nn_submission_md8.csv', index = False)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [20]:
nn_pred = model.predict(X_test)
nn_pred = np.argmax(nn_pred, axis = 1)

## Defining data to be exported
data_out = pd.DataFrame({'Actual': Y_test, 'Pred': nn_pred})
data_out['Actual'] = np.where(data_out['Actual'] == 0, 'us',
                              np.where(data_out['Actual'] == 1, 'uk',
                                       np.where(data_out['Actual'] == 2, 'canada',
                                                np.where(data_out['Actual'] == 3, 'australia',
                                                         np.where(data_out['Actual'] == 4, 'ireland', 'new_zealand')))))

data_out['Pred'] = np.where(data_out['Pred'] == 0, 'us',
                            np.where(data_out['Pred'] == 1, 'uk',
                                     np.where(data_out['Pred'] == 2, 'canada',
                                              np.where(data_out['Pred'] == 3, 'australia',
                                                       np.where(data_out['Pred'] == 4, 'ireland', 'new_zealand')))))
data_out.head()

Unnamed: 0,Actual,Pred
0,australia,canada
1,canada,canada
2,uk,canada
3,new_zealand,new_zealand
4,canada,canada


In [5]:
pd.crosstab(data_out['Actual'], data_out['Pred'])

Pred,canada,ireland,new_zealand,uk,us
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
australia,0,3261,17467,5342,5950
canada,1,0,0,15029,16851
ireland,0,5066,23214,2136,1598
new_zealand,0,4208,24419,1658,1730
uk,0,0,0,15536,16471
us,0,0,0,14245,17818


In [21]:
pd.crosstab(data_out['Actual'], data_out['Pred'])

Pred,australia,canada,ireland,new_zealand,uk,us
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
australia,684,828,2272,2317,888,1127
canada,2,2534,0,0,2879,2474
ireland,417,217,4121,2633,395,314
new_zealand,508,211,3072,3589,269,317
uk,2,2090,0,0,3222,2707
us,0,1921,0,0,2112,3878


In [6]:
data_out['Actual'].value_counts()

us             32063
australia      32020
new_zealand    32015
ireland        32014
uk             32007
canada         31881
Name: Actual, dtype: int64

In [4]:
from sklearn.metrics import classification_report

target_names = ['us', 'uk', 'canada', 'australia', 'ireland', 'new_zealand']
print(classification_report(data_out['Actual'], data_out['Pred'], target_names = target_names))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

          us       0.00      0.00      0.00     32020
          uk       1.00      0.00      0.00     31881
      canada       0.40      0.16      0.23     32014
   australia       0.38      0.76      0.50     32015
     ireland       0.29      0.49      0.36     32007
 new_zealand       0.29      0.56      0.39     32063

    accuracy                           0.33    192000
   macro avg       0.39      0.33      0.25    192000
weighted avg       0.39      0.33      0.25    192000



  _warn_prf(average, modifier, msg_start, len(result))
