In [416]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from gensim.models import Word2Vec, KeyedVectors

In [417]:
X_train = np.load('./X_word2vec_train.npy')
y_train = np.load('y_train.npy')
X_test = np.load('./X_word2vec_test.npy')
y_test = np.load('y_test.npy')


In [418]:
labels = {}

for index, i in enumerate(set(y)):
    labels[i] = index

labels

{'wheat': 0,
 'interest': 1,
 'acq': 2,
 'crude': 3,
 'earn': 4,
 'sugar': 5,
 'ship': 6,
 'money-supply': 7,
 'trade': 8,
 'money-fx': 9}

In [419]:
def label_encode(label):
    return labels[label]

In [420]:
y_train = np.array([label_encode(label) for label in y_train])
y_test = np.array([label_encode(label) for label in y_test])
y_train

array([0, 4, 2, ..., 9, 9, 6])

In [421]:
# X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size= 0.3, random_state=42)

print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_test dimension= ', y_test.shape)

X_train dimension=  (6149, 100)
X_test dimension=  (2410, 100)
y_train dimension=  (6149,)
y_test dimension=  (2410,)


In [422]:
lm = linear_model.LogisticRegression(max_iter=1000)
lm.fit(X_train, y_train)

In [423]:
lm.score(X_test, y_test)


0.03153526970954357

In [424]:
print(metrics.classification_report(y_test, lm.predict(X_test)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        38
           1       0.00      0.00      0.00       112
           2       1.00      0.00      0.00       699
           3       0.00      0.00      0.00       144
           4       0.58      0.01      0.01      1087
           5       0.00      0.00      0.00        31
           6       0.03      0.99      0.06        69
           7       0.00      0.00      0.00        30
           8       0.00      0.00      0.00        88
           9       0.00      0.00      0.00       112

    accuracy                           0.03      2410
   macro avg       0.16      0.10      0.01      2410
weighted avg       0.55      0.03      0.01      2410



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [425]:
# Importing the required packages
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt

In [426]:
def train_using_gini(X_train, X_test, y_train):
 
    # Creating the classifier object
    clf_gini = DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=10)
 
    # Performing training
    clf_gini.fit(X_train, y_train)
    return clf_gini



def train_using_entropy(X_train, X_test, y_train):
 
    # Decision tree with entropy
    clf_entropy = DecisionTreeClassifier(
        criterion="entropy", random_state=100, max_depth=10)
 
    # Performing training
    clf_entropy.fit(X_train, y_train)
    return clf_entropy

In [427]:
# Function to make predictions
def prediction(X_test, clf_object):
    y_pred = clf_object.predict(X_test)
    print("Predicted values:")
    print(y_pred)
    return y_pred
 
# Placeholder function for cal_accuracy
def cal_accuracy(y_test, y_pred):
    print("Confusion Matrix: ",
          confusion_matrix(y_test, y_pred))
    print("Accuracy : ",
          accuracy_score(y_test, y_pred)*100)
    print("Report : ",
          classification_report(y_test, y_pred))

In [428]:
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = train_using_entropy(X_train, X_test, y_train)
 

In [429]:
print("Results Using Gini Index:")
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)

Results Using Gini Index:
Predicted values:
[3 3 3 ... 3 3 3]
Confusion Matrix:  [[  0   0   0  32   0   0   6   0   0   0]
 [  0   0   0  98   0   0  14   0   0   0]
 [  0   0   5 312   9  73 299   0   0   1]
 [  0   0   3 132   4   0   4   1   0   0]
 [  0   1 344  33 588   8 110   0   0   3]
 [  0   0   0  31   0   0   0   0   0   0]
 [  0   0   0  68   1   0   0   0   0   0]
 [  0   0   0  13   0   0  17   0   0   0]
 [  0   0   0  72   0   0  16   0   0   0]
 [  0   0   0 103   0   0   8   1   0   0]]
Accuracy :  30.08298755186722
Report :                precision    recall  f1-score   support

           0       0.00      0.00      0.00        38
           1       0.00      0.00      0.00       112
           2       0.01      0.01      0.01       699
           3       0.15      0.92      0.25       144
           4       0.98      0.54      0.70      1087
           5       0.00      0.00      0.00        31
           6       0.00      0.00      0.00        69
           7   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [430]:
print("Results Using EntIndex:")
y_pred_ent = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_ent)

Results Using EntIndex:
Predicted values:
[0 0 0 ... 0 2 0]
Confusion Matrix:  [[ 19   0  12   0   0   0   7   0   0   0]
 [ 60   0  45   0   0   0   7   0   0   0]
 [258   0 403   1   3   3  28   0   3   0]
 [ 94   0  41   2   2   0   5   0   0   0]
 [374   0  67   4 522   0  51   0  67   2]
 [ 16   0   8   0   3   0   4   0   0   0]
 [ 54   0   4   0   2   0   9   0   0   0]
 [  3   0  16   0   0   0  11   0   0   0]
 [ 63   0  10   0   0   0  15   0   0   0]
 [ 97   0   6   0   0   1   8   0   0   0]]
Accuracy :  39.62655601659751
Report :                precision    recall  f1-score   support

           0       0.02      0.50      0.04        38
           1       0.00      0.00      0.00       112
           2       0.66      0.58      0.61       699
           3       0.29      0.01      0.03       144
           4       0.98      0.48      0.64      1087
           5       0.00      0.00      0.00        31
           6       0.06      0.13      0.08        69
           7     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
