In [1]:
import pandas as pd
import numpy as np
import nltk
from sklearn.decomposition import PCA

import xml.etree.ElementTree as ET
import gensim

In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


###  Basic Functions Needed for all Prepostions

In [4]:
def training_data(file,prepostion):
    tree = ET.parse(file)
    root = tree.getroot()
    n = len(root)
    df = pd.DataFrame(columns=['id','sent_before','sent_after','PP','label'],index=np.arange(n))
    for i in range(n):
        df['id'][i] = root[i][0].attrib["instance"]
        df['label'][i] = root[i][0].attrib["senseid"]
        df['sent_before'][i] = root[i][1].text
        df['sent_after'][i] = root[i][1][0].tail
        df['PP'][i] = root[i][1][0].text
    return df

In [5]:
def preprocessing(df):
    for i in range(len(df)):
        df['sent_before'][i] = nltk.word_tokenize(df['sent_before'][i])
        df['sent_after'][i] = nltk.word_tokenize(df['sent_after'][i])
        df['sent_after'][i] = [w for w in df['sent_after'][i] if w.isalpha()]
        df['sent_before'][i] = [w for w in df['sent_before'][i] if w.isalpha()]
    return df

In [6]:
import gensim.downloader

In [7]:
print(list(gensim.downloader.info()['models'].keys()))

['fasttext-wiki-news-subwords-300', 'conceptnet-numberbatch-17-06-300', 'word2vec-ruscorpora-300', 'word2vec-google-news-300', 'glove-wiki-gigaword-50', 'glove-wiki-gigaword-100', 'glove-wiki-gigaword-200', 'glove-wiki-gigaword-300', 'glove-twitter-25', 'glove-twitter-50', 'glove-twitter-100', 'glove-twitter-200', '__testing_word2vec-matrix-synopsis']


In [8]:
glove_vectors_emb = gensim.downloader.load('word2vec-google-news-300')



In [9]:
def vl_vr_words(df):
    n = len(df)
    df_vl_vr = pd.DataFrame(columns=['vl1','vl2','vl3','vl4','vr1','vr2','vr3','vr4'],index=np.arange(n))
    for i in range(n):
        try:
            df_vl_vr['vl1'][i] = df['sent_before'][i][-1]
        except IndexError:
            df_vl_vr['vl1'][i] = 'UNK'
            df_vl_vr['vl2'][i] = 'UNK'
            df_vl_vr['vl3'][i] = 'UNK'
            df_vl_vr['vl4'][i] = 'UNK'
            continue
        try: 
            df_vl_vr['vl2'][i] = df['sent_before'][i][-2]
        except IndexError:
            df_vl_vr['vl2'][i] = 'UNK'
            df_vl_vr['vl3'][i] = 'UNK'
            df_vl_vr['vl4'][i] = 'UNK'
            continue
        try: 
            df_vl_vr['vl3'][i] = df['sent_before'][i][-3]
        except IndexError:
            df_vl_vr['vl3'][i] = 'UNK'
            df_vl_vr['vl4'][i] = 'UNK'
            continue
        try:
            df_vl_vr['vl4'][i] = df['sent_before'][i][-4]
        except IndexError:
            df_vl_vr['vl4'][i] = 'UNK'
            continue
            
    for i in range(n):
        try:
            df_vl_vr['vr1'][i] = df['sent_after'][i][0]
        except IndexError:
            df_vl_vr['vr1'][i] = 'UNK'
            df_vl_vr['vr2'][i] = 'UNK'
            df_vl_vr['vr3'][i] = 'UNK'
            df_vl_vr['vr4'][i] = 'UNK'
            continue
        try: 
            df_vl_vr['vr2'][i] = df['sent_after'][i][1]
        except IndexError:
            df_vl_vr['vr2'][i] = 'UNK'
            df_vl_vr['vr3'][i] = 'UNK'
            df_vl_vr['vr4'][i] = 'UNK'
            continue
        try: 
            df_vl_vr['vr3'][i] = df['sent_after'][i][2]
        except IndexError:
            df_vl_vr['vr3'][i] = 'UNK'
            df_vl_vr['vr4'][i] = 'UNK'
            continue
        try:
            df_vl_vr['vr4'][i] = df['sent_after'][i][3]
        except IndexError:
            df_vl_vr['vr4'][i] = 'UNK'
            continue
    return df_vl_vr

In [10]:
def find_vinter(df_vl_vr,embeddings):
    tmp = np.array(df_vl_vr)
    n = len(df_vl_vr)
    d=300
    kl=4
    kr=4
    V_inter = np.zeros((d,n))
    for i in range(n):
        V = np.zeros((300,8))
        for j in range(8):
            try:
                V[:,j] = embeddings[tmp[i][j]]
            except KeyError:
                V[:,j] = np.zeros(300)
                
        V[:,0:4] = np.cumsum(V[:,0:4],axis=1)
        V[:,4:8] = np.cumsum(V[:,4:8],axis=1)
        for k in range(1,4):
            V[:,k] = V[:,k]/k+1
            V[:,k+4] = V[:,k+4]/k+1
            
        pca = PCA(n_components=1)
        pc1 = pca.fit_transform(V)
        V_inter[:,i] = pc1.reshape(300,)
    return V_inter

In [11]:
def find_vlvr_vector(df, embeddings):
    n = len(df)
    tmp = np.array(df)
    vl = np.zeros((300,n))
    vr = np.zeros((300,n))
    for i in range(n):
        for j in range(4):
            try:
                vl[:,i] = vl[:,i] + embeddings[tmp[i][j]]
            except KeyError:
                pass
            try:
                vr[:,i] = vr[:,i] + embeddings[tmp[i][j+4]]
            except KeyError:
                pass
    return vl/4, vr/4

In [12]:
def feature_vector(vl,vr,vinter,a,b,c):
    n=vl.shape[1]
    features = np.zeros((300,n))
    for i in range(n):
        features[:,i] = a*vl[:,i]+b*vr[:,i]+c*vinter[:,i]
    return features
    

In [13]:
def features_from_file(file,preposition):
  df_final = training_data(file,preposition)
  df_final = preprocessing(df_final)
  df_vl_vr = vl_vr_words(df_final)
  v_inter = find_vinter(df_vl_vr,glove_vectors_emb)
  vlkl,vrkr = find_vlvr_vector(df_vl_vr,glove_vectors_emb)
  features = feature_vector(vlkl,vrkr,v_inter,1/3,1/3,1/3)
  return features, df_final



### SVM

In [14]:
from sklearn.model_selection import train_test_split

from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

In [15]:
def SVM_Model(features,df):
    X = features.T
    y = df["label"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)
    classifier = SVC(kernel = 'linear')
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    print(classification_report(y_test, y_pred))
    return classifier

### KNN

In [16]:
def KNN_model(features, df):
  X = features.T
  y = df["label"]
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)
  classifier = KNeighborsClassifier(n_neighbors=3)
  classifier.fit(X_train, y_train)
  y_pred = classifier.predict(X_test)
  print(classification_report(y_test, y_pred))
  return classifier

In [17]:
### Prediction

In [18]:
def pre_test(file,preposition,embeddings,classifier):
    with open(file) as f:
            read_data = f.readlines()
    df_test = pd.DataFrame(columns=['sent_before','sent_after'],index=np.arange(len(read_data)))
    for i in range(len(read_data)):
        tokenized_text = nltk.word_tokenize(read_data[i])
        tokenized_text = [w.lower() for w in tokenized_text if w.isalpha()]
        index_prep = tokenized_text.index(preposition)
        df_test['sent_before'][i] =  tokenized_text[0:index_prep]
        df_test['sent_after'][i] = tokenized_text[index_prep+1:]
    df_vl_vr = vl_vr_words(df_test)
    v_inter=find_vinter(df_vl_vr,embeddings)
    vlkl,vrkr = find_vlvr_vector(df_vl_vr,embeddings)
    features_test = feature_vector(vlkl,vrkr,v_inter,1/3,1/3,1/3)
    X_test = features_test.T
    y_test = classifier.predict(X_test)
    return y_test  

## Preposition Wise Training

##### 1. About

In [19]:
features,df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-about.sents.trng.xml','about')

In [20]:
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.91      1.00      0.95       160
       2(1a)       0.00      0.00      0.00         4
        3(2)       1.00      0.09      0.17        11
      3(2)-1       1.00      0.33      0.50         3

    accuracy                           0.91       178
   macro avg       0.73      0.36      0.40       178
weighted avg       0.90      0.91      0.87       178

              precision    recall  f1-score   support

        1(1)       0.91      1.00      0.96       160
       2(1a)       0.00      0.00      0.00         4
        3(2)       1.00      0.18      0.31        11
      3(2)-1       0.00      0.00      0.00         3

    accuracy                           0.91       178
   macro avg       0.48      0.30      0.32       178
weighted avg       0.88      0.91      0.88       178



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/about.out','about',glove_vectors_emb,svm_classifier)

#### 2. Above

In [22]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-above.sents.trng.xml','above')

In [23]:
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.00      0.00      0.00         0
       2(1a)       0.00      0.00      0.00         2
        4(2)       0.60      0.38      0.46         8
       8(2d)       0.00      0.00      0.00         1
        9(3)       0.00      0.00      0.00         1

    accuracy                           0.25        12
   macro avg       0.12      0.07      0.09        12
weighted avg       0.40      0.25      0.31        12

              precision    recall  f1-score   support

        1(1)       0.00      0.00      0.00         0
       2(1a)       0.50      0.50      0.50         2
        4(2)       0.71      0.62      0.67         8
       8(2d)       0.00      0.00      0.00         1
        9(3)       0.00      0.00      0.00         1

    accuracy                           0.50        12
   macro avg       0.24      0.23      0.23        12
weighted avg       0.56      0.50      0.53        12



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/above.out','above',glove_vectors_emb,knn_classifier)

### 3. across

In [25]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-across.sents.trng.xml','across')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.96      1.00      0.98        77
        2(2)       0.00      0.00      0.00         3

    accuracy                           0.96        80
   macro avg       0.48      0.50      0.49        80
weighted avg       0.93      0.96      0.94        80

              precision    recall  f1-score   support

        1(1)       0.96      1.00      0.98        77
        2(2)       0.00      0.00      0.00         3

    accuracy                           0.96        80
   macro avg       0.48      0.50      0.49        80
weighted avg       0.93      0.96      0.94        80



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [26]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/across.out','across',glove_vectors_emb,svm_classifier)

### 4. after

In [27]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-after.sents.trng.xml','after')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.67      0.31      0.42        13
      1(1)-1       0.33      0.50      0.40         2
        5(2)       0.25      0.67      0.36         6
       6(2a)       1.00      1.00      1.00         1
        7(3)       0.00      0.00      0.00         3
        9(5)       0.00      0.00      0.00         1

    accuracy                           0.38        26
   macro avg       0.38      0.41      0.36        26
weighted avg       0.46      0.38      0.36        26

              precision    recall  f1-score   support

        1(1)       0.60      0.46      0.52        13
      1(1)-1       0.00      0.00      0.00         2
        5(2)       0.29      0.67      0.40         6
       6(2a)       0.00      0.00      0.00         1
        7(3)       0.00      0.00      0.00         3
        9(5)       0.00      0.00      0.00         1

    accuracy                           0.38        26
   macro avg       0.15

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/after.out','after',glove_vectors_emb,svm_classifier)

### 5.against

In [29]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-against.sents.trng.xml','against')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.38      0.42      0.40        12
       10(4)       0.62      0.81      0.70        26
       2(1a)       1.00      0.50      0.67         4
       3(1b)       0.00      0.00      0.00         2
        4(2)       0.00      0.00      0.00         4
       6(2b)       0.00      0.00      0.00         1

    accuracy                           0.57        49
   macro avg       0.33      0.29      0.29        49
weighted avg       0.50      0.57      0.52        49

              precision    recall  f1-score   support

        1(1)       0.47      0.67      0.55        12
       10(4)       0.73      0.85      0.79        26
       2(1a)       1.00      0.50      0.67         4
       3(1b)       0.00      0.00      0.00         2
        4(2)       0.00      0.00      0.00         4
       6(2b)       0.00      0.00      0.00         1

    accuracy                           0.65        49
   macro avg       0.37

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/against.out','against',glove_vectors_emb,knn_classifier)

### 6. along

In [31]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-along.sents.trng.xml','along')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.93      1.00      0.97        86
        3(2)       0.00      0.00      0.00         5
        4(3)       0.00      0.00      0.00         1

    accuracy                           0.93        92
   macro avg       0.31      0.33      0.32        92
weighted avg       0.87      0.93      0.90        92

              precision    recall  f1-score   support

        1(1)       0.93      1.00      0.97        86
        3(2)       0.00      0.00      0.00         5
        4(3)       0.00      0.00      0.00         1

    accuracy                           0.93        92
   macro avg       0.31      0.33      0.32        92
weighted avg       0.87      0.93      0.90        92



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [32]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/along.out','along',glove_vectors_emb,svm_classifier)




### 7. among




In [33]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-among.sents.trng.xml','among')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.50      0.73      0.59        11
        2(2)       0.67      0.40      0.50         5
        3(3)       0.50      0.33      0.40         9

    accuracy                           0.52        25
   macro avg       0.56      0.49      0.50        25
weighted avg       0.53      0.52      0.50        25

              precision    recall  f1-score   support

        1(1)       0.50      0.91      0.65        11
        2(2)       0.00      0.00      0.00         5
        3(3)       0.75      0.33      0.46         9

    accuracy                           0.52        25
   macro avg       0.42      0.41      0.37        25
weighted avg       0.49      0.52      0.45        25



In [34]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/among.out','among',glove_vectors_emb,knn_classifier)

### 8.around

In [35]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-around.sents.trng.xml','around')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.43      0.20      0.27        15
       2(1a)       0.00      0.00      0.00         1
        3(2)       0.52      0.80      0.63        41
        4(3)       0.33      0.14      0.20         7
      4(3)-1       0.00      0.00      0.00        11
        5(4)       0.50      0.44      0.47         9

    accuracy                           0.49        84
   macro avg       0.30      0.27      0.26        84
weighted avg       0.41      0.49      0.42        84

              precision    recall  f1-score   support

        1(1)       0.56      0.67      0.61        15
       2(1a)       0.00      0.00      0.00         1
        3(2)       0.70      0.63      0.67        41
        4(3)       0.21      0.57      0.31         7
      4(3)-1       0.50      0.09      0.15        11
        5(4)       0.88      0.78      0.82         9

    accuracy                           0.57        84
   macro avg       0.47

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [36]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/around.out','around',glove_vectors_emb,knn_classifier)

### 9.as

In [46]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-as.sents.trng.xml','as')
np.unique(df_final['label'])
#svm_classifier = SVM_Model(features,df_final)
#knn_classifier =  KNN_model(features,df_final)

array(['1(1)'], dtype=object)

In [47]:
## Only label is there so no model required


### 10. at

In [48]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-at.sents.trng.xml','at')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.47      0.59      0.52        68
  1(1) 4(2b)       0.00      0.00      0.00         9
      10(5a)       0.50      0.25      0.33         8
       11(6)       1.00      1.00      1.00         2
     11(6)-1       0.39      0.35      0.37        26
        2(2)       0.67      0.50      0.57         8
       3(2a)       0.00      0.00      0.00         1
        5(3)       0.67      0.57      0.62         7
       8(4a)       0.00      0.00      0.00         1
        9(5)       0.38      0.40      0.39        48
9(5) 11(6)-1       0.00      0.00      0.00         1

    accuracy                           0.45       179
   macro avg       0.37      0.33      0.34       179
weighted avg       0.42      0.45      0.43       179

              precision    recall  f1-score   support

        1(1)       0.64      0.85      0.73        68
  1(1) 4(2b)       1.00      0.22      0.36         9
      10(5a)       1.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [49]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/at.out','at',glove_vectors_emb,knn_classifier)

### 11. at

In [50]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-at.sents.trng.xml','at')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.47      0.59      0.52        68
  1(1) 4(2b)       0.00      0.00      0.00         9
      10(5a)       0.50      0.25      0.33         8
       11(6)       1.00      1.00      1.00         2
     11(6)-1       0.39      0.35      0.37        26
        2(2)       0.67      0.50      0.57         8
       3(2a)       0.00      0.00      0.00         1
        5(3)       0.67      0.57      0.62         7
       8(4a)       0.00      0.00      0.00         1
        9(5)       0.38      0.40      0.39        48
9(5) 11(6)-1       0.00      0.00      0.00         1

    accuracy                           0.45       179
   macro avg       0.37      0.33      0.34       179
weighted avg       0.42      0.45      0.43       179

              precision    recall  f1-score   support

        1(1)       0.64      0.85      0.73        68
  1(1) 4(2b)       1.00      0.22      0.36         9
      10(5a)       1.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [51]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/at.out','at',glove_vectors_emb,knn_classifier)

### 12. before

In [52]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-before.sents.trng.xml','before')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.33      0.11      0.17         9
        2(2)       0.12      0.33      0.18         3
       3(2a)       0.00      0.00      0.00         0

    accuracy                           0.17        12
   macro avg       0.15      0.15      0.12        12
weighted avg       0.28      0.17      0.17        12

              precision    recall  f1-score   support

        1(1)       1.00      0.22      0.36         9
        2(2)       0.14      0.33      0.20         3
       3(2a)       0.00      0.00      0.00         0

    accuracy                           0.25        12
   macro avg       0.38      0.19      0.19        12
weighted avg       0.79      0.25      0.32        12



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [53]:

#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/before.out','before',glove_vectors_emb,knn_classifier)

### 13. behind

In [54]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-behind.sents.trng.xml','behind')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.58      0.75      0.65        20
        3(2)       0.29      0.20      0.24        10
       4(2a)       1.00      0.50      0.67         4
        5(3)       0.00      0.00      0.00         1

    accuracy                           0.54        35
   macro avg       0.47      0.36      0.39        35
weighted avg       0.53      0.54      0.52        35

              precision    recall  f1-score   support

        1(1)       0.61      0.55      0.58        20
        3(2)       0.44      0.70      0.54        10
       4(2a)       0.00      0.00      0.00         4
        5(3)       0.00      0.00      0.00         1

    accuracy                           0.51        35
   macro avg       0.26      0.31      0.28        35
weighted avg       0.47      0.51      0.48        35



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [55]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/behind.out','behind',glove_vectors_emb,svm_classifier)

### 14. beneath


 

In [56]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-beneath.sents.trng.xml','beneath')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.69      0.90      0.78        10
       2(1a)       0.50      0.33      0.40         3
        3(2)       0.00      0.00      0.00         2

    accuracy                           0.67        15
   macro avg       0.40      0.41      0.39        15
weighted avg       0.56      0.67      0.60        15

              precision    recall  f1-score   support

        1(1)       0.83      1.00      0.91        10
       2(1a)       0.67      0.67      0.67         3
        3(2)       0.00      0.00      0.00         2

    accuracy                           0.80        15
   macro avg       0.50      0.56      0.53        15
weighted avg       0.69      0.80      0.74        15



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [57]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/beneath.out','beneath',glove_vectors_emb,knn_classifier)

### 15. besides

In [59]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-beside.sents.trng.xml','beside')
np.unique(df_final["label"])
#svm_classifier = SVM_Model(features,df_final)
#knn_classifier =  KNN_model(features,df_final)

array(['1(1)'], dtype=object)

In [60]:
#Only one label so ouput will be '1(1)' for all.

### 16. between

In [61]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-between.sents.trng.xml','between')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.43      0.68      0.53        19
        3(3)       0.00      0.00      0.00         1
        4(4)       0.26      0.31      0.29        16
  4(4) 5(4a)       0.00      0.00      0.00         1
       5(4a)       0.75      0.27      0.40        11
       6(4b)       0.00      0.00      0.00         2
       7(4c)       0.00      0.00      0.00         3

    accuracy                           0.40        53
   macro avg       0.21      0.18      0.17        53
weighted avg       0.39      0.40      0.36        53

              precision    recall  f1-score   support

        1(1)       0.64      0.95      0.77        19
        3(3)       0.00      0.00      0.00         1
        4(4)       0.67      0.62      0.65        16
  4(4) 5(4a)       0.00      0.00      0.00         1
       5(4a)       1.00      0.64      0.78        11
       6(4b)       0.00      0.00      0.00         2
       7(4c)       1.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [62]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/between.out','between',glove_vectors_emb,knn_classifier)

### 17. by

In [63]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-by.sents.trng.xml','by')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

       12(3)       0.40      0.33      0.36        12
      15(3c)       0.00      0.00      0.00         3
       18(5)       0.29      0.25      0.27         8
       2(1a)       0.30      0.16      0.21        19
       3(1b)       0.51      0.71      0.60        56
       4(1c)       0.67      0.40      0.50        10
        5(2)       0.14      0.12      0.13        16
       8(2c)       0.00      0.00      0.00         4

    accuracy                           0.43       128
   macro avg       0.29      0.25      0.26       128
weighted avg       0.39      0.43      0.40       128

              precision    recall  f1-score   support

       12(3)       0.70      0.58      0.64        12
      15(3c)       0.00      0.00      0.00         3
       18(5)       0.21      0.75      0.33         8
       2(1a)       0.40      0.11      0.17        19
       3(1b)       0.80      0.66      0.73        56
       4(1c)       0.86 

In [64]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/by.out','by',glove_vectors_emb,knn_classifier)

### 18. down

In [65]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-down.sents.trng.xml','down')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.59      0.64      0.61        36
       2(1a)       1.00      1.00      1.00         1
       3(1b)       0.70      0.65      0.67        46

    accuracy                           0.65        83
   macro avg       0.76      0.76      0.76        83
weighted avg       0.65      0.65      0.65        83

              precision    recall  f1-score   support

        1(1)       0.65      0.67      0.66        36
       2(1a)       1.00      1.00      1.00         1
       3(1b)       0.73      0.72      0.73        46

    accuracy                           0.70        83
   macro avg       0.79      0.79      0.79        83
weighted avg       0.70      0.70      0.70        83



In [67]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/down.out','down',glove_vectors_emb,knn_classifier)

### 19. during

In [68]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-during.sents.trng.xml','during')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.67      0.57      0.62        14
       2(1a)       0.33      0.43      0.38         7

    accuracy                           0.52        21
   macro avg       0.50      0.50      0.50        21
weighted avg       0.56      0.52      0.54        21

              precision    recall  f1-score   support

        1(1)       0.78      0.50      0.61        14
       2(1a)       0.42      0.71      0.53         7

    accuracy                           0.57        21
   macro avg       0.60      0.61      0.57        21
weighted avg       0.66      0.57      0.58        21



In [69]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/during.out','during',glove_vectors_emb,knn_classifier)

### 20. for

In [70]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-for.sents.trng.xml','for')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.50      0.43      0.46         7
      10(8a)       0.00      0.00      0.00         4
       11(9)       0.00      0.00      0.00         3
      12(10)       0.33      0.09      0.14        11
        2(2)       0.19      0.12      0.15        24
      2(2)-1       0.38      0.30      0.33        20
        3(3)       0.47      0.32      0.38        22
       4(3a)       0.00      0.00      0.00         6
        5(4)       0.32      0.40      0.35        65
        6(5)       0.41      0.59      0.49        70
        7(6)       0.00      0.00      0.00         2
        8(7)       0.00      0.00      0.00         2
        9(8)       0.00      0.00      0.00         2

    accuracy                           0.37       238
   macro avg       0.20      0.17      0.18       238
weighted avg       0.33      0.37      0.34       238

              precision    recall  f1-score   support

        1(1)       0.25 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [71]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/for.out','for',glove_vectors_emb,knn_classifier)

### 21. from

In [72]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-from.sents.trng.xml','from')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        1(1)       0.28      0.51      0.36        83
       10(7)       0.24      0.25      0.25        64
     10(7)-1       1.00      0.12      0.22         8
       11(8)       1.00      0.33      0.50         3
       12(9)       1.00      0.30      0.46        10
     12(9)-1       0.33      0.09      0.14        22
      13(10)       0.56      0.29      0.38        17
      14(11)       0.00      0.00      0.00         2
       2(1a)       0.00      0.00      0.00         2
        4(3)       0.34      0.30      0.32        69
  4(3) 10(7)       0.00      0.00      0.00         2
  4(3) 12(9)       1.00      0.50      0.67         2
       5(3a)       0.00      0.00      0.00         1
        6(4)       0.50      0.08      0.14        12
       7(4a)       1.00      0.50      0.67         2
        8(5)       0.00      0.00      0.00         3

    accuracy                           0.31       302
   macro avg       0.45   

  _warn_prf(average, modifier, msg_start, len(result))


In [73]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/from.out','from',glove_vectors_emb,knn_classifier)

### 22. in

In [74]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-in.sents.trng.xml','in')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        1(1)       0.46      0.85      0.59       143
      1(1)-1       0.50      0.20      0.29         5
       11(8)       0.00      0.00      0.00         3
       2(1a)       0.07      0.02      0.04        42
        3(2)       0.43      0.20      0.27        15
        4(3)       0.00      0.00      0.00         1
        5(4)       0.25      0.08      0.12        26
       6(4a)       0.53      0.32      0.40        25
     6(4a)-1       0.00      0.00      0.00        16
        7(5)       0.36      0.16      0.22        55
        8(6)       1.00      0.17      0.29         6
        9(7)       0.00      0.00      0.00        11
      9(7)-1       1.00      1.00      1.00         2

    accuracy                           0.42       350
   macro avg       0.35      0.23      0.25       350
weighted avg       0.36      0.42      0.35       350

              precision    recall  f1-score   support

        1(1)       0.49 

  _warn_prf(average, modifier, msg_start, len(result))


In [75]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/in.out','in',glove_vectors_emb,knn_classifier)

### 23. inside

In [76]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-inside.sents.trng.xml','inside')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.60      0.43      0.50         7
       2(1a)       0.40      0.80      0.53         5
       3(1b)       0.50      0.20      0.29         5

    accuracy                           0.47        17
   macro avg       0.50      0.48      0.44        17
weighted avg       0.51      0.47      0.45        17

              precision    recall  f1-score   support

        1(1)       0.56      0.71      0.63         7
       2(1a)       0.57      0.80      0.67         5
       3(1b)       1.00      0.20      0.33         5

    accuracy                           0.59        17
   macro avg       0.71      0.57      0.54        17
weighted avg       0.69      0.59      0.55        17



In [77]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/inside.out','inside',glove_vectors_emb,knn_classifier)

### 24. into

In [78]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-into.sents.trng.xml','into')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.33      0.35      0.34        40
   1(1) 3(3)       1.00      0.25      0.40         4
        2(2)       0.67      0.15      0.25        13
        3(3)       0.49      0.70      0.58        64
        4(4)       0.80      0.36      0.50        11
        5(5)       0.00      0.00      0.00         2
        6(6)       0.00      0.00      0.00        12
        7(7)       0.50      0.40      0.44         5

    accuracy                           0.45       151
   macro avg       0.47      0.28      0.31       151
weighted avg       0.45      0.45      0.42       151

              precision    recall  f1-score   support

        1(1)       0.46      0.78      0.57        40
   1(1) 3(3)       0.00      0.00      0.00         4
        2(2)       0.42      0.38      0.40        13
        3(3)       0.79      0.66      0.72        64
        4(4)       1.00      0.45      0.62        11
        5(5)       0.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [79]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/into.out','into',glove_vectors_emb,knn_classifier)

### 25. like

In [80]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-like.sents.trng.xml','like')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.50      0.15      0.24        13
       2(1a)       0.79      0.96      0.87        48
       3(1b)       0.00      0.00      0.00         3
       5(1d)       0.20      0.50      0.29         2
        6(2)       0.00      0.00      0.00         1

    accuracy                           0.73        67
   macro avg       0.30      0.32      0.28        67
weighted avg       0.67      0.73      0.68        67

              precision    recall  f1-score   support

        1(1)       0.67      0.15      0.25        13
       2(1a)       0.79      0.85      0.82        48
       3(1b)       0.10      0.33      0.15         3
       5(1d)       0.50      0.50      0.50         2
        6(2)       0.00      0.00      0.00         1

    accuracy                           0.67        67
   macro avg       0.41      0.37      0.34        67
weighted avg       0.71      0.67      0.66        67



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [81]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/like.out','like',glove_vectors_emb,svm_classifier)

###  26. of

In [82]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-of.sents.trng.xml','of')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

      10(5a)       0.00      0.00      0.00         5
       11(6)       0.21      0.08      0.11        52
11(6) 12(6a)       1.00      0.75      0.86         4
      12(6a)       0.31      0.50      0.38       152
      13(6b)       0.91      0.36      0.51        28
       14(7)       0.00      0.00      0.00        16
      15(7a)       1.00      0.50      0.67         6
      16(7b)       0.43      0.27      0.33        11
       17(8)       0.00      0.00      0.00        10
       2(1a)       0.36      0.12      0.18        33
       3(1b)       0.30      0.46      0.36       167
     3(1b)-1       0.33      0.09      0.15        43
        4(2)       0.00      0.00      0.00         2
       5(2a)       0.00      0.00      0.00         1
        6(3)       0.22      0.23      0.22       120
      6(3)-1       0.00      0.00      0.00         3
       7(3a)       0.00      0.00      0.00         4
        9(5)       0.21    

  _warn_prf(average, modifier, msg_start, len(result))


In [83]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/of.out','of',glove_vectors_emb,knn_classifier)

### 27. off

In [84]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-off.sents.trng.xml','off')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.33      0.11      0.17         9
        2(2)       0.00      0.00      0.00         2
       3(2a)       0.00      0.00      0.00         1
        4(3)       0.74      0.97      0.84        29

    accuracy                           0.71        41
   macro avg       0.27      0.27      0.25        41
weighted avg       0.59      0.71      0.63        41

              precision    recall  f1-score   support

        1(1)       0.00      0.00      0.00         9
        2(2)       0.00      0.00      0.00         2
       3(2a)       0.00      0.00      0.00         1
        4(3)       0.72      1.00      0.84        29

    accuracy                           0.71        41
   macro avg       0.18      0.25      0.21        41
weighted avg       0.51      0.71      0.59        41



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [85]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/off.out','off',glove_vectors_emb,svm_classifier)

### 28. on

In [86]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-on.sents.trng.xml','on')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.18      0.51      0.27        35
       11(5)       0.00      0.00      0.00        12
     11(5)-1       0.56      0.29      0.38        17
       12(6)       0.00      0.00      0.00         3
      13(6a)       0.00      0.00      0.00         2
      15(7a)       0.00      0.00      0.00         4
       17(8)       0.00      0.00      0.00        10
       2(1a)       0.25      0.09      0.13        11
      20(10)       1.00      0.50      0.67         2
      23(13)       0.00      0.00      0.00         1
       3(1b)       0.00      0.00      0.00         5
       4(1c)       0.67      0.18      0.29        11
       5(1d)       0.07      0.06      0.07        32
        7(2)       0.33      0.18      0.23        17
        8(3)       0.32      0.44      0.37        39
  8(3) 11(5)       1.00      0.50      0.67         2
       9(3a)       0.25      0.07      0.11        15

    accuracy              

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [87]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/on.out','on',glove_vectors_emb,knn_classifier)

### 29. onto

In [88]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-onto.sents.trng.xml','onto')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.90      1.00      0.95        27
        3(3)       0.00      0.00      0.00         3

    accuracy                           0.90        30
   macro avg       0.45      0.50      0.47        30
weighted avg       0.81      0.90      0.85        30

              precision    recall  f1-score   support

        1(1)       0.90      1.00      0.95        27
        3(3)       0.00      0.00      0.00         3

    accuracy                           0.90        30
   macro avg       0.45      0.50      0.47        30
weighted avg       0.81      0.90      0.85        30



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [89]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/onto.out','onto',glove_vectors_emb,svm_classifier)

### 30. over

In [90]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-over.sents.trng.xml','over')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.00      0.00      0.00         1
       11(4)       0.38      0.50      0.43         6
      13(4b)       0.00      0.00      0.00         4
     15(6)-1       0.00      0.00      0.00         1
       16(7)       0.46      0.55      0.50        22
       2(1a)       0.25      0.30      0.27        10
       3(1b)       1.00      1.00      1.00         1
        4(2)       0.00      0.00      0.00         2
       6(2b)       0.00      0.00      0.00         3

    accuracy                           0.38        50
   macro avg       0.23      0.26      0.24        50
weighted avg       0.32      0.38      0.35        50

              precision    recall  f1-score   support

        1(1)       0.00      0.00      0.00         1
       11(4)       0.60      0.50      0.55         6
      13(4b)       0.00      0.00      0.00         4
     15(6)-1       0.00      0.00      0.00         1
       16(7)       0.71 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [91]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/over.out','over',glove_vectors_emb,knn_classifier)

### 31. round

In [92]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-round.sents.trng.xml','round')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       1.00      0.20      0.33         5
        3(2)       0.38      0.56      0.45         9
       4(2a)       0.00      0.00      0.00         1
        5(3)       1.00      0.25      0.40         4
       6(3a)       0.18      0.33      0.23         9
        8(4)       0.50      0.39      0.44        18

    accuracy                           0.37        46
   macro avg       0.51      0.29      0.31        46
weighted avg       0.50      0.37      0.38        46

              precision    recall  f1-score   support

        1(1)       1.00      0.20      0.33         5
        3(2)       0.64      0.78      0.70         9
       4(2a)       0.20      1.00      0.33         1
        5(3)       0.40      0.50      0.44         4
       6(3a)       0.44      0.44      0.44         9
        8(4)       0.80      0.67      0.73        18

    accuracy                           0.59        46
   macro avg       0.58

  _warn_prf(average, modifier, msg_start, len(result))


In [93]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/round.out','round',glove_vectors_emb,knn_classifier)

### 32. through

In [94]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-through.sents.trng.xml','through')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.31      0.29      0.30        17
  1(1) 3(1b)       0.60      0.46      0.52        26
       10(3)       0.50      0.60      0.55         5
       12(5)       0.00      0.00      0.00         1
     12(5)-1       0.00      0.00      0.00         1
      13(5a)       0.00      0.00      0.00         3
       2(1a)       0.00      0.00      0.00         6
       3(1b)       0.25      0.50      0.33        28
 3(1b) 10(3)       0.00      0.00      0.00         2
 3(1b) 5(1d)       1.00      0.50      0.67         2
       4(1c)       0.43      0.21      0.29        14
     5(1d)-1       0.50      0.33      0.40         3
       6(1e)       0.00      0.00      0.00         1
       8(2a)       0.00      0.00      0.00         1
       9(2b)       0.00      0.00      0.00         1

    accuracy                           0.35       111
   macro avg       0.24      0.19      0.20       111
weighted avg       0.36   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [95]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/through.out','through',glove_vectors_emb,knn_classifier)

### 33. to 

In [96]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-to.sents.trng.xml','to')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        1(1)       0.43      0.58      0.49       100
      10(4a)       1.00      0.11      0.20         9
       13(5)       0.25      0.06      0.10        17
       14(6)       0.36      0.25      0.30        16
       2(1a)       0.00      0.00      0.00         1
        5(2)       0.00      0.00      0.00         1
       6(2a)       0.57      0.13      0.21        31
        8(3)       0.35      0.50      0.41        88
      8(3)-1       0.88      0.27      0.41        26
        9(4)       1.00      0.57      0.73         7

    accuracy                           0.42       296
   macro avg       0.48      0.25      0.28       296
weighted avg       0.47      0.42      0.39       296

              precision    recall  f1-score   support

        1(1)       0.57      0.86      0.69       100
      10(4a)       1.00      0.22      0.36         9
       13(5)       0.67      0.59      0.62        17
       14(6)       0.56 

In [97]:
#SVM is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/to.out','to',glove_vectors_emb,svm_classifier)

### 34. towards

In [98]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-towards.sents.trng.xml','towards')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

              precision    recall  f1-score   support

        1(1)       0.94      1.00      0.97        46
        4(2)       0.00      0.00      0.00         2
      4(2)-1       0.80      0.67      0.73         6

    accuracy                           0.93        54
   macro avg       0.58      0.56      0.57        54
weighted avg       0.89      0.93      0.91        54

              precision    recall  f1-score   support

        1(1)       0.94      1.00      0.97        46
        4(2)       1.00      0.50      0.67         2
      4(2)-1       1.00      0.67      0.80         6

    accuracy                           0.94        54
   macro avg       0.98      0.72      0.81        54
weighted avg       0.95      0.94      0.94        54



  _warn_prf(average, modifier, msg_start, len(result))


In [99]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/towards.out','towards',glove_vectors_emb,knn_classifier)


### 35. with

In [100]:
features, df_final = features_from_file('/content/drive/MyDrive/Assignment1/data_assn1/Train/Source/pp-with.sents.trng.xml','with')
svm_classifier = SVM_Model(features,df_final)
knn_classifier =  KNN_model(features,df_final)

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        1(1)       0.28      0.36      0.31        45
      11(7b)       0.57      0.24      0.33        17
 11(7b) 7(5)       1.00      0.33      0.50         3
      12(7c)       0.00      0.00      0.00         1
       13(8)       1.00      0.33      0.50         3
     15(9)-1       1.00      0.33      0.50         3
        2(2)       0.46      0.38      0.41        16
  2(2) 3(2a)       0.00      0.00      0.00         2
       3(2a)       0.50      0.50      0.50         2
        4(3)       0.19      0.18      0.19        50
   4(3) 2(2)       0.00      0.00      0.00         1
      4(3)-1       0.00      0.00      0.00         3
       5(3a)       0.39      0.55      0.46        74
        6(4)       0.29      0.08      0.13        24
        7(5)       0.33      0.18      0.24        11
        9(7)       0.29      0.33      0.31        42

    accuracy                           0.33       297
   macro avg       0.39   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [101]:
#KNN is better
y_out_test = pre_test('/content/drive/MyDrive/Assignment1/test_out/with.out','with',glove_vectors_emb,knn_classifier)