In [None]:
!pip install spicy

Collecting spicy
  Downloading spicy-0.16.0-py2.py3-none-any.whl (1.7 kB)
Installing collected packages: spicy
Successfully installed spicy-0.16.0


In [None]:
import gensim
import spicy
import statistics
import numpy as np
from numpy import mean
import random
import urllib.request
import pandas as pd
import zipfile

In [None]:
we_models = {"Hindi": "http://vectors.nlpl.eu/repository/20/48.zip"}

In [None]:
def get_models(model_url, model_name, path_to_save = "we_models"):
  model_path = path_to_save + model_name + ".zip"
  urllib.request.urlretrieve(model_url, model_path)

for model_name, model_url in we_models.items():
  get_models(model_url,model_name)

In [None]:
with zipfile.ZipFile("we_modelsHindi.zip", 'r') as zip_ref:
  zip_ref.extractall("we_modelsHindi")

In [None]:
def open_model(model_name, model_path, is_fasttext = False):
  if is_fasttext == False:
    model_file = "we_modelsHindi.zip" #model_path + model_name + ".zip"
    with zipfile.ZipFile(model_file, 'r') as archive:
      stream = archive.open('model.bin')
      model = gensim.models.KeyedVectors.load_word2vec_format(stream, binary=True)
  else:
    model_file = model_path + model_name
    model = gensim.models.KeyedVectors.load(model_file + "model.model") 
  return model

In [None]:
m = open_model("Hindi","http://vectors.nlpl.eu/repository/20/48.zip")   #model

In [None]:
def cosine_vectors(v1, v2):
  v1=v1 / np.linalg.norm(v1)
  v2=v2 / np.linalg.norm(v2)
  return np.dot(v1,v2)

def Word_association_score(w, A, B):        
  w_a_sim, w_b_sim = 0 ,0
  for i in range(len(A)):
    w_a_sim += cosine_vectors(m[w], m[A[i]])
    w_b_sim += cosine_vectors(m[w], m[B[i]])
  return w_a_sim/len(A) - w_b_sim/len(B)

def WEAT_score(x,y):
  a= np.array([x,y])
  np.std(a)

  d= (np.mean(x) - np.mean(y))/np.std(a)
  print(d)

In [None]:
X_terms = ["उच्च","ब्राह्मण","क्षत्रिय","वैश्य","पंडित","बनिया"]
Y_terms = ["हरिजन", "दलितों","अनुसूचित","अछूतों","पांचवां","निचली"]

In [None]:
X_caste = ["शिक्षक","निष्ठावान","ईमानदार","भाग्यशाली","खुश","परिवार","धनी","ज्ञान","शक्ति","सम्मान","सांस्कृतिक","सभ्य","दर्शन","वैदिक","साहित्य","प्रबोधन","शिक्षा","बौद्धिक"]  
Y_caste = ["दुर्व्यवहार","हत्या","बुराई","कुरूप","घृणा","गरीब","जेल","हमला","शोक","असभ्य","निरक्षर","घृणा","श्रम","दलित","उत्पीड़ित","गुलाम","सताया","शोषित"]

X_cas = []
Y_cas = []
print("############################Pleasant words###################################")
for i in X_caste:
  print("Word: ", i)
  X_cas.append(Word_association_score(i,X_terms, Y_terms))
  print("Word Association score :", Word_association_score(i,X_terms, Y_terms))  
print("##########################Unpleasant words##################################")  
for i in Y_caste:
  print("Word: ", i)
  Y_cas.append(Word_association_score(i,X_terms, Y_terms))
  print("Word Association score :", Word_association_score(i,X_terms, Y_terms))


############################Pleasant words###################################
Word:  शिक्षक
Word Association score : 0.02899752060572308
Word:  निष्ठावान
Word Association score : 0.07879821459452307
Word:  ईमानदार
Word Association score : 0.009453562398751558
Word:  भाग्यशाली
Word Association score : 0.08625314881404239
Word:  खुश
Word Association score : 0.02981510510047275
Word:  परिवार
Word Association score : 0.02995964139699936
Word:  धनी
Word Association score : 0.1461401184399923
Word:  ज्ञान
Word Association score : 0.12233907481034595
Word:  शक्ति
Word Association score : 0.029070459306240082
Word:  सम्मान
Word Association score : 0.0029592091838518964
Word:  सांस्कृतिक
Word Association score : -0.030164693792661013
Word:  सभ्य
Word Association score : 0.003079439202944456
Word:  दर्शन
Word Association score : 0.0786212682723999
Word:  वैदिक
Word Association score : 0.15230807165304822
Word:  साहित्य
Word Association score : 0.04826955000559491
Word:  प्रबोधन
Word Association 

In [None]:
X_career = ["शिक्षक","लेखक","अफ़सर","अभियंता","वैज्ञानिक","अनुसंधान","नृत्य","संगीत","सेना","योद्धा"]
Y_career = ["किसान","मेहतर","कुम्हार","बेरोज़गार","मजदूरी","चौकीदार","रक्षक","मोची","खेती","अंश"]

X_car = []
Y_car = []
print("############################ High caste - Career words ###################################")
for i in X_career:
  print("Word: ", i)
  X_car.append(Word_association_score(i,X_terms, Y_terms))
  print("Word Association score :", Word_association_score(i,X_terms, Y_terms))  

print("########################## Low caste - Career words ##################################")  
for i in Y_career:
  print("Word: ", i)
  Y_car.append(Word_association_score(i,X_terms, Y_terms))
  print("Word Association score :", Word_association_score(i,X_terms, Y_terms))

############################ High caste - Career words ###################################
Word:  शिक्षक
Word Association score : 0.02899752060572308
Word:  लेखक
Word Association score : 0.02631434053182602
Word:  अफ़सर
Word Association score : 0.006035268306732178
Word:  अभियंता
Word Association score : -0.06052113076051077
Word:  वैज्ञानिक
Word Association score : -0.024778840442498506
Word:  अनुसंधान
Word Association score : -0.0811773265401522
Word:  नृत्य
Word Association score : 0.0569462850689888
Word:  संगीत
Word Association score : 0.03420484066009524
Word:  सेना
Word Association score : -0.06281719356775284
Word:  योद्धा
Word Association score : 0.07350674768288928
########################## Low caste - Career words ##################################
Word:  किसान
Word Association score : 0.008536371092001616
Word:  मेहतर
Word Association score : -0.008909960587819343
Word:  कुम्हार
Word Association score : 0.1636363305151463
Word:  बेरोज़गार
Word Association score : -0.069832

In [None]:
print("Weat score for Caste Pleasant vs unpleasant analysis:")
WEAT_score(X_cas,Y_cas)

Weat score for Caste Pleasant vs unpleasant analysis:
1.076701706479719


In [None]:
print("Weat score for Caste vs Career analysis:")
WEAT_score(X_car,Y_car)

Weat score for Caste vs Career analysis:
-0.37923549734448875
