In [113]:
import nltk
from gensim import corpora, models, similarities
from gensim.models import Word2Vec
from gensim.test.utils import common_texts

# **Loading Data**

In [114]:
with open ("/kaggle/input/text-data/data", "r") as myfile:
    text = myfile.read().splitlines()
print(text)

['**Morocco and Marrakech: A Tapestry of Tradition and Modernity** Morocco, located at the crossroads of Europe and Africa, is a country drenched in history, mystery, and cultural richness. A testament to the ancient civilizations that once flourished here, this North African kingdom boasts a unique blend of Arab, Berber, and European influences. At the heart of Morocco\'s rich tapestry lies Marrakech, one of its four imperial cities and a vibrant epicenter of tradition and modernity. **Geographical Significance** Morocco is bordered by the Atlantic Ocean to the west, the Mediterranean Sea to the north, Algeria to the east and southeast, and the vast Sahara desert to the south. Its strategic location has historically made it a sought-after territory and a melting pot of cultures, religions, and trade routes. **Marrakech: The Red City** Marrakech, often referred to as "The Red City" due to its distinctive red-hued buildings, stands against the backdrop of the snow-capped Atlas Mountains

# **Remove Stars**

In [115]:
# Function --------------------------
def remove_symbols(text) :
    removed_symbols_text = []
    for w in text:
        mod_string = w.replace("**","")
        #mod_string = w.replace("**","").replace("'", "").replace("]", "").replace("[", "").strip()
        removed_symbols_text.append(mod_string)
    return removed_symbols_text[0]

# Execution --------------------------
text_no_symbols = remove_symbols(text)
print(text_no_symbols)

Morocco and Marrakech: A Tapestry of Tradition and Modernity Morocco, located at the crossroads of Europe and Africa, is a country drenched in history, mystery, and cultural richness. A testament to the ancient civilizations that once flourished here, this North African kingdom boasts a unique blend of Arab, Berber, and European influences. At the heart of Morocco's rich tapestry lies Marrakech, one of its four imperial cities and a vibrant epicenter of tradition and modernity. Geographical Significance Morocco is bordered by the Atlantic Ocean to the west, the Mediterranean Sea to the north, Algeria to the east and southeast, and the vast Sahara desert to the south. Its strategic location has historically made it a sought-after territory and a melting pot of cultures, religions, and trade routes. Marrakech: The Red City Marrakech, often referred to as "The Red City" due to its distinctive red-hued buildings, stands against the backdrop of the snow-capped Atlas Mountains. Established i

# **Remove Punctuation**

In [116]:
import string
# Function --------------------------
def remove_punctuation(text):
    translator = str.maketrans('','',string.punctuation)
    text_without_punctuation = text.translate(translator)
    return text_without_punctuation

# Execution --------------------------
text_no_punctuation = remove_punctuation(text_no_symbols)
print(text_no_punctuation)

Morocco and Marrakech A Tapestry of Tradition and Modernity Morocco located at the crossroads of Europe and Africa is a country drenched in history mystery and cultural richness A testament to the ancient civilizations that once flourished here this North African kingdom boasts a unique blend of Arab Berber and European influences At the heart of Moroccos rich tapestry lies Marrakech one of its four imperial cities and a vibrant epicenter of tradition and modernity Geographical Significance Morocco is bordered by the Atlantic Ocean to the west the Mediterranean Sea to the north Algeria to the east and southeast and the vast Sahara desert to the south Its strategic location has historically made it a soughtafter territory and a melting pot of cultures religions and trade routes Marrakech The Red City Marrakech often referred to as The Red City due to its distinctive redhued buildings stands against the backdrop of the snowcapped Atlas Mountains Established in the 11th century it has rem

# **Remove Stopwords**

In [117]:
from nltk.corpus import stopwords
# Function --------------------------
def remove_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in stopwords.words('english')])

# Execution --------------------------
text_no_stopwords = remove_stopwords(text_no_punctuation)
print(text_no_stopwords)

Morocco Marrakech A Tapestry Tradition Modernity Morocco located crossroads Europe Africa country drenched history mystery cultural richness A testament ancient civilizations flourished North African kingdom boasts unique blend Arab Berber European influences At heart Moroccos rich tapestry lies Marrakech one four imperial cities vibrant epicenter tradition modernity Geographical Significance Morocco bordered Atlantic Ocean west Mediterranean Sea north Algeria east southeast vast Sahara desert south Its strategic location historically made soughtafter territory melting pot cultures religions trade routes Marrakech The Red City Marrakech often referred The Red City due distinctive redhued buildings stands backdrop snowcapped Atlas Mountains Established 11th century remained crucial political economic cultural center Morocco Journey Medina Marrakechs old town Medina UNESCO World Heritage site labyrinthine maze narrow alleys bustling souks historical landmarks The Djemaa elFna Square lies

# **Lemmatization**

In [118]:
import spacy
nlp = spacy.load('en_core_web_sm')

# Function ---------------------------
def lemmatize(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc])

# Execution ---------------------------
text_lemmatized = lemmatize(text_no_stopwords)
print(text_lemmatized)

Morocco Marrakech A Tapestry Tradition Modernity Morocco locate crossroad Europe Africa country drench history mystery cultural richness a testament ancient civilization flourish north african kingdom boast unique blend arab Berber european influence at heart Moroccos rich tapestry lie Marrakech one four imperial city vibrant epicenter tradition modernity geographical Significance Morocco border Atlantic Ocean west Mediterranean Sea north Algeria east southeast vast Sahara desert south its strategic location historically make soughtafter territory melting pot culture religion trade route Marrakech the Red City Marrakech often refer the Red City due distinctive redhue building stand backdrop snowcappe Atlas Mountains establish 11th century remain crucial political economic cultural center Morocco Journey Medina Marrakechs old town Medina UNESCO World Heritage site labyrinthine maze narrow alley bustle souks historical landmark the Djemaa elfna Square lie heart Medina come alive every ev

# **Tokenization**

In [119]:
from nltk.tokenize import word_tokenize
tokenized_text = word_tokenize(text_lemmatized)
print(tokenized_text)

['Morocco', 'Marrakech', 'A', 'Tapestry', 'Tradition', 'Modernity', 'Morocco', 'locate', 'crossroad', 'Europe', 'Africa', 'country', 'drench', 'history', 'mystery', 'cultural', 'richness', 'a', 'testament', 'ancient', 'civilization', 'flourish', 'north', 'african', 'kingdom', 'boast', 'unique', 'blend', 'arab', 'Berber', 'european', 'influence', 'at', 'heart', 'Moroccos', 'rich', 'tapestry', 'lie', 'Marrakech', 'one', 'four', 'imperial', 'city', 'vibrant', 'epicenter', 'tradition', 'modernity', 'geographical', 'Significance', 'Morocco', 'border', 'Atlantic', 'Ocean', 'west', 'Mediterranean', 'Sea', 'north', 'Algeria', 'east', 'southeast', 'vast', 'Sahara', 'desert', 'south', 'its', 'strategic', 'location', 'historically', 'make', 'soughtafter', 'territory', 'melting', 'pot', 'culture', 'religion', 'trade', 'route', 'Marrakech', 'the', 'Red', 'City', 'Marrakech', 'often', 'refer', 'the', 'Red', 'City', 'due', 'distinctive', 'redhue', 'building', 'stand', 'backdrop', 'snowcappe', 'Atlas'

# **Modele Word2Vec**

In [120]:
from gensim.models import Word2Vec

model = Word2Vec(sentences=[tokenized_text], vector_size=100, window=5, min_count=1, sg=0)

# **Représentation vectorielle**

In [121]:
# Extraire la représentation vectorielle d'un mot ('Morocco')
word_vector = model.wv['Morocco']

# **Calcul de similarité**

In [122]:
# Calculer la similarité entre 'Morocco' et les autres mots
similarity_dict = {}
for word in model.wv.key_to_index:
    similarity = model.wv.similarity('Morocco', word)
    similarity_dict[('Morocco', word)] = similarity

# Affichage
for key, value in similarity_dict.items():
    print(f"{key}: {value:.8f},")

('Morocco', 'Marrakech'): -0.00888751,
('Morocco', 'Morocco'): 1.00000000,
('Morocco', 'the'): -0.02240901,
('Morocco', 'Medina'): 0.06768797,
('Morocco', 'city'): 0.00486306,
('Morocco', 'offer'): 0.01068953,
('Morocco', 'history'): -0.11212663,
('Morocco', 'tradition'): -0.11374921,
('Morocco', 'heart'): 0.03336586,
('Morocco', 'value'): -0.09513409,
('Morocco', 'architecture'): -0.13427579,
('Morocco', 'also'): 0.01036768,
('Morocco', 'saadian'): -0.00370709,
('Morocco', 'Red'): 0.13765508,
('Morocco', 'Moroccan'): 0.16084917,
('Morocco', 'century'): 0.12341348,
('Morocco', 'like'): 0.02346492,
('Morocco', 'mint'): 0.08564979,
('Morocco', 'north'): 0.16067749,
('Morocco', 'moroccan'): -0.15986335,
('Morocco', 'journey'): -0.01813636,
('Morocco', 'City'): -0.07160485,
('Morocco', 'ancient'): 0.18854910,
('Morocco', 'unparalleled'): 0.12748823,
('Morocco', 'town'): -0.01195521,
('Morocco', 'culture'): -0.06758684,
('Morocco', 'man'): 0.00584295,
('Morocco', 'make'): 0.18806581,
('Moro

In [123]:
# Extraire les mots contextuels pour un mot central donné 'Morocco'
most_similar_words = model.wv.most_similar('Morocco')

print(most_similar_words)

[('brim', 0.21468834578990936), ('dynasty', 0.2132807970046997), ('blue', 0.18933390080928802), ('ancient', 0.18854910135269165), ('make', 0.18806582689285278), ('Heritage', 0.18374522030353546), ('southeast', 0.18357336521148682), ('plethora', 0.18075498938560486), ('influence', 0.1791507601737976), ('tree', 0.17583271861076355)]
