# Code Documentation

### Installing `keybert`
The code includes the command `!pip install keybert` to install the `keybert` package. This command uses the pip package manager to download and install the `keybert` package, which is a Python library for keyword extraction and keyphrase extraction using BERT embeddings.

In [1]:
!pip install keybert



In [None]:
import re
import pandas as pd
from keybert import KeyBERT

In [None]:
# Load the CSV data
data = pd.read_csv('/content/drive/MyDrive/For Capstone/Collecting data/Place Detail (Scored).csv')

# Create a new column for the extracted keywords
data['One_Keywords'] = ''

# Initialize the KeyBERT model
model = KeyBERT()

# Define a function to remove strange symbols and emojis from text
def clean_text(text):
    # Remove non-alphanumeric characters
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)

    # Remove emojis
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002702-\U000027B0"
                               u"\U000024C2-\U0001F251"
                               "]+", flags=re.UNICODE)
    text = emoji_pattern.sub(r'', text)

    return text.strip()

# Process each review text and extract keywords
for index, row in data.iterrows():
    review_text = row['Review Text']

    if isinstance(review_text, str):
        cleaned_text = clean_text(review_text)
        keywords = model.extract_keywords(cleaned_text, keyphrase_ngram_range=(1, 1), stop_words='english',
                                          use_maxsum=True, nr_candidates=20, top_n=10, use_mmr=True, diversity=0.65)
        data.at[index, 'One_Keywords'] = ', '.join([kw for kw, _ in keywords])
    else:
        data.at[index, 'One_Keywords'] = ''  # or any other appropriate handling for non-string values


In [None]:
# Save the updated data to a new CSV file
data.to_csv('/content/drive/MyDrive/For Capstone/Collecting data/Place Detail (Scored + Keyword Extracted).csv', index=False)