**Preprocessing:**

In [None]:
# Importing necessary libraries
import cmath
import pandas as pd
import re
import symspellpy
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import WordNetLemmatizer
from symspellpy import SymSpell, Verbosity
import pkg_resources
from tqdm import tqdm

nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet') 

def preprocess_text(text):
    words = []
    if isinstance(text, str):
        # 1. Eliminating handles, URLs, and numbers
        text = re.sub(r"(?:\@|http?\://|https?\://|www)\S+", "", text)
        text = re.sub(r"\d+", "", text)  # Remove numeric characters
        text = re.sub(r'@[^\s]+|\b(?:!+)\b', '', text)
        text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)  # Split words based on camel case
        text = re.sub(r'&gt|&lt', ' ', text)
        text = re.sub(r'([a-z])\1{2,}', r'\1', text)
        text = re.sub(r'(\*|\W\*)+', '. ', text)  # Replacing asterisks (*) or asterisks with non-word characters with a dot (.) and a space
        text = re.sub(r'\(.*?\)', '', text)  # Removing text within parentheses
        text = re.sub(r'(\W+)\.', '. ', text)  # Replacing one or more non-word characters followed by a dot (.) with a single dot (.) and a space
        text = re.sub(r'(\.|\?|!)(\w)', r'\1 \2', text)  # Adding a space after a dot (.), question mark (?), or exclamation mark (!) if followed by a word character
        text = re.sub(r'ing\b', ' ', text)  # Replacing the word "ing" with a space
        text = re.sub(r'\b(product received for free\.?|\s+product received for free\s+)\b', '', text)  # Removing specific noise text
        text = re.sub(r'(\b\w+\b)( \1)+', r'\1', text)  # Removing repeated phrases if they occur consecutively

        # 2. Tokenizing the string into words
        words = word_tokenize(text)

        # 3. Removing stop words
        stop_words = set(stopwords.words('english'))
        stop_words.update(['game', 'play', 'player', 'time','a', 'for', 'i', 'the', 'expand', 'click', 'contain', 'spoiler', 'it', 'be', 'in', 'one', 'get', 'even', 'year', 'guess', 'see', 'got', 'feel', 'want', 'tell', 'absolute','every'])
        words = [word for word in words if word not in stop_words]

        # 4. Removing punctuation
        words = [word for word in words if word.isalpha()]

        # 5. Converting all words to lowercase
        words = [word.lower() for word in words]

        # 6. Lemmatization 
       #lemmatizer = WordNetLemmatizer()
       #words = [lemmatizer.lemmatize(word) for word in words]

        # 6. Typo correction
        sym_spell = SymSpell(max_dictionary_edit_distance=3, prefix_length=7)
        dictionary_path = pkg_resources.resource_filename("symspellpy", "frequency_dictionary_en_82_765.txt")
        if not sym_spell.word_count:
            sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

        words_fixed = []
        for word in words:
            suggestions = sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=3)
            if suggestions:
                words_fixed.append(suggestions[0].term)

        return ' '.join(words_fixed)  # Join the preprocessed words back into a string


# Reading dataset
df = pd.read_excel('')

# Preprocessing 'User Review' column
tqdm.pandas()  # progress tracking
df[''] = df['User Review'].progress_apply(lambda x: preprocess_text(x))

# Displaying a sample of the preprocessed text
print("Sample of preprocessed text:")
print(df['processed_review_with'].head())

# Saving the preprocessed dataset to a new file
df.to_excel('', index=False)
print("Preprocessed dataset saved successfully.")