# Importing necessary libraries

In [1]:
import pandas as pd
import numpy as np
import string
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler



# Exracting data

In [2]:
test = pd.read_csv('/kaggle/input/kaggle-war-eclipse/test (2).csv')
train = pd.read_csv('/kaggle/input/kaggle-war-eclipse/train (1).csv')

# Data Preprocessing

In [3]:
def remove_punctuation(text):
    """Custom function to remove punctuation"""
    PUNCT_TO_REMOVE = string.punctuation
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

In [4]:
def remove_stopwords(text):
    """Custom function to remove stopwords"""
    STOPWORDS = set(stopwords.words('english'))
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

In [5]:
cnt = Counter()
for text in train["Review"].values:
    for word in text.split():
        cnt[word] += 1

In [6]:
def remove_rarewords(text):
    """Custom function to remove rare words"""
    n_rare_words = 10
    RAREWORDS = set([w for (w, wc) in cnt.most_common()[:-n_rare_words-1:-1]])
    return " ".join([word for word in str(text).split() if word not in RAREWORDS])

In [7]:
def stem_words(text):
    """Custom function to perform stemming"""
    stemmer = PorterStemmer()
    return " ".join([stemmer.stem(word) for word in text.split()])

In [8]:
chat_words_str = """
AFAIK=As Far As I Know
AFK=Away From Keyboard
ASAP=As Soon As Possible
ATK=At The Keyboard
ATM=At The Moment
A3=Anytime, Anywhere, Anyplace
BAK=Back At Keyboard
BBL=Be Back Later
BBS=Be Back Soon
BFN=Bye For Now
B4N=Bye For Now
BRB=Be Right Back
BRT=Be Right There
BTW=By The Way
B4=Before
B4N=Bye For Now
CU=See You
CUL8R=See You Later
CYA=See You
FAQ=Frequently Asked Questions
FC=Fingers Crossed
FWIW=For What It's Worth
FYI=For Your Information
GAL=Get A Life
GG=Good Game
GN=Good Night
GMTA=Great Minds Think Alike
GR8=Great!
G9=Genius
IC=I See
ICQ=I Seek you (also a chat program)
ILU=ILU: I Love You
IMHO=In My Honest/Humble Opinion
IMO=In My Opinion
IOW=In Other Words
IRL=In Real Life
KISS=Keep It Simple, Stupid
LDR=Long Distance Relationship
LMAO=Laugh My A.. Off
LOL=Laughing Out Loud
LTNS=Long Time No See
L8R=Later
MTE=My Thoughts Exactly
M8=Mate
NRN=No Reply Necessary
OIC=Oh I See
PITA=Pain In The A..
PRT=Party
PRW=Parents Are Watching
ROFL=Rolling On The Floor Laughing
ROFLOL=Rolling On The Floor Laughing Out Loud
ROTFLMAO=Rolling On The Floor Laughing My A.. Off
SK8=Skate
STATS=Your sex and age
ASL=Age, Sex, Location
THX=Thank You
TTFN=Ta-Ta For Now!
TTYL=Talk To You Later
U=You
U2=You Too
U4E=Yours For Ever
WB=Welcome Back
WTF=What The F...
WTG=Way To Go!
WUF=Where Are You From?
W8=Wait...
7K=Sick:-D Laugher
"""

In [9]:
chat_words_map_dict = {}
chat_words_list = []
for line in chat_words_str.split("\n"):
    if line != "":
        cw = line.split("=")[0]
        cw_expanded = line.split("=")[1]
        chat_words_list.append(cw)
        chat_words_map_dict[cw] = cw_expanded
chat_words_list = set(chat_words_list)

def chat_words_conversion(text):
    new_text = []
    for w in text.split():
        if w.upper() in chat_words_list:
            new_text.append(chat_words_map_dict[w.upper()])
        else:
            new_text.append(w)
    return " ".join(new_text)

In [10]:
import re

UNICODE_EMO = {
    u"\U0001F600": "happy face",
    u"\U0001F601": "grinning face with smiling eyes",
    u"\U0001F602": "face with tears of joy",
    u"\U0001F603": "smiling face with open mouth",
    u"\U0001F604": "smiling face with open mouth and smiling eyes",
    u"\U0001F605": "smiling face with open mouth and cold sweat",
    u"\U0001F606": "smiling face with open mouth and tightly-closed eyes",
    u"\U0001F607": "smiling face with halo",
    u"\U0001F608": "smiling face with horns",
    u"\U0001F609": "winking face",
    u"\U0001F60A": "smiling face with smiling eyes",
    u"\U0001F60B": "face savoring delicious food",
    u"\U0001F60C": "relieved face",
    u"\U0001F60D": "smiling face with heart-shaped eyes",
    u"\U0001F60E": "smiling face with sunglasses",
    u"\U0001F60F": "smirking face",
    u"\U0001F610": "neutral face",
    u"\U0001F611": "expressionless face",
    u"\U0001F612": "unamused face",
    u"\U0001F613": "face with cold sweat",
    u"\U0001F614": "pensive face",
    u"\U0001F615": "confused face",
    u"\U0001F616": "confounded face",
    u"\U0001F617": "kissing face",
    u"\U0001F618": "face throwing a kiss",
    u"\U0001F619": "kissing face with smiling eyes",
    u"\U0001F61A": "kissing face with closed eyes",
    u"\U0001F61B": "face with stuck-out tongue",
    u"\U0001F61C": "face with stuck-out tongue and winking eye",
    u"\U0001F61D": "face with stuck-out tongue and tightly-closed eyes",
    u"\U0001F61E": "disappointed face",
    u"\U0001F61F": "worried face",
    u"\U0001F620": "angry face",
    u"\U0001F621": "pouting face",
    u"\U0001F622": "crying face",
    u"\U0001F623": "persevering face",
    u"\U0001F624": "face with look of triumph",
    u"\U0001F625": "disappointed but relieved face",
    u"\U0001F626": "frowning face with open mouth",
    u"\U0001F627": "anguished face",
    u"\U0001F628": "fearful face",
    u"\U0001F629": "weary face",
    u"\U0001F62A": "sleepy face",
    u"\U0001F62B": "tired face",
    u"\U0001F62C": "grimacing face"
}

def convert_emojis(text):
    for emot in UNICODE_EMO:
        text = re.sub(r'('+emot+')', "_".join(UNICODE_EMO[emot].replace(",","").replace(":","").split()), text)
    return text

In [11]:
train['Review'] = train['Review'].str.lower()
train['Review'] = train['Review'].apply(lambda text: remove_punctuation(text))
train['Review'] = train['Review'].apply(lambda text: remove_stopwords(text))

In [12]:
train['Review'] = train['Review'].apply(lambda text: remove_rarewords(text))
train['Review'] = train['Review'].apply(lambda text: stem_words(text))
train['Review'] = train['Review'].apply(convert_emojis)

# Data Training

In [13]:
preprocessed_data = train['Review']
labels = train['Rating']

X_train, X_test, y_train, y_test = train_test_split(preprocessed_data, labels, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

scaler = StandardScaler(with_mean=False)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = RandomForestClassifier()
model.fit(X_train, y_train)

# Model Evaluation

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9435714285714286
