# Sentiment Analysis: Machine Learning Approach

## Preprocessing
### Labeling:

In [60]:
import pandas as pd

def review_labeling(data):
    df = pd.read_csv(data, header=None)
    
    # Apply the condition only to the second column (index 1)
    df[1] = df[1].apply(lambda x: 'positive' if x >= 3 else 'negative')
    
    return df

df_processed = review_labeling('Data/ML/reviews_ML_mini.csv')
print(df_processed)

                                                    0         1
0   Great music service, the audio is high quality...  positive
1   Please ignore previous negative rating. This a...  positive
2   This pop-up "Get the best Spotify experience o...  positive
3     Really buggy and terrible to use as of recently  negative
4   Dear Spotify why do I get songs that I didn't ...  negative
..                                                ...       ...
94      I love Spotify just wish I can download music  positive
95                   It's awesome and great music app  positive
96  Works super well.... When it works... The app ...  negative
97  I can't even listen to my favorite songs bc of...  negative
98               it does'nt let you choose your songs  negative

[99 rows x 2 columns]


### Stopword removal and punctuation filtering:

In [61]:
import numpy as np
import pandas as pd
import spacy
import emoji
import re
import nltk
from tqdm import tqdm


In [62]:
nltk.download('stopwords')
nltk.download('punkt')

# Load spaCy model
nlp = spacy.load('en_core_web_sm')

# Function to map emojis to text. E.g., "Python is 👍" is transformed to "Python is :thumbs_up:"
def map_emojis(text):
    """Convert emojis to their text representations."""
    text = emoji.demojize(text, delimiters=(" ", " "))
    # Replace underscores with spaces in emoji descriptions to avoid them being omitted during tokenization.
    return text.replace('_', ' ')

# Preprocessing function
def preprocess_text(text):
    """Preprocess a single text string."""
    # Lowercase
    text = text.lower()

    # Convert emojis to text
    text = map_emojis(text)

    # Remove URLs and emails
    text = re.sub(r'http\S+|www.\S+', '', text)
    text = re.sub(r'\S+@\S+', '', text)

    # Tokenization and Lemmatization using spaCy
    doc = nlp(text)
    tokens = [
        token.lemma_ for token in doc
        if not token.is_punct and not token.is_stop
    ]

    return ' '.join(tokens)

tqdm.pandas(desc="Preprocessing Reviews")
df_processed[0] = df_processed[0].progress_apply(preprocess_text)

print(df_processed)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Philipp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Philipp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Preprocessing Reviews: 100%|██████████| 99/99 [00:00<00:00, 168.32it/s]

                                                    0         1
0   great music service audio high quality app eas...  positive
1   ignore previous negative rating app super grea...  positive
2   pop good spotify experience android 12 annoyin...  positive
3                         buggy terrible use recently  negative
4             dear spotify song playlist shuffle play  negative
..                                                ...       ...
94                   love spotify wish download music  positive
95                            awesome great music app  positive
96  work super work app not want load 3/4ths time ...  negative
97  listen favorite song bc shuffle try different ...  negative
98                            does'nt let choose song  negative

[99 rows x 2 columns]





Train 