
# Steam Reviews Sentiment Analysis — SVM (Colab)
Train a Support Vector Machine (LinearSVC) with TF‑IDF on Steam reviews.
- Lightweight & stable defaults for Colab/Windows.
- CSV preview, training, evaluation, artifacts, and prediction.


In [1]:

#@title Install dependencies
!pip -q install pandas scikit-learn matplotlib joblib


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:

#@title Imports
import os, re, json, joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from typing import Tuple, List
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score,
    roc_curve,
    precision_recall_fscore_support,
    accuracy_score,
)


In [4]:
#@title Configure paths (edit these)
DATA_PATH = "/content/drive/MyDrive/SVM/dataset.csv"  #@param {type:"string"}
TEXT_COL = "review_text"            #@param {type:"string"}
LABEL_COL = "review_score"          #@param {type:"string"}
OUT_DIR = "/content/outputs"        #@param {type:"string"}

os.makedirs(OUT_DIR, exist_ok=True)
print("Using:", DATA_PATH, "→ OUT:", OUT_DIR)

Using: /content/drive/MyDrive/SVM/dataset.csv → OUT: /content/outputs


In [5]:

#@title (Optional) Mount Google Drive
USE_DRIVE = True  #@param {type:"boolean"}
if USE_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Drive mounted. You can set DATA_PATH to a file under /content/drive/MyDrive/...")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive mounted. You can set DATA_PATH to a file under /content/drive/MyDrive/...


In [6]:

#@title Utilities
TEXT_CANDIDATES = ["review", "review_text", "text", "content", "body", "comment"]
LABEL_CANDIDATES = ["review_type", "recommended", "label", "sentiment", "review_score"]

def preview_csv(path: str, n: int = 5, encoding: str = "utf-8"):
    import pandas as pd
    try:
        df = pd.read_csv(path, nrows=n, encoding=encoding)
    except UnicodeDecodeError:
        df = pd.read_csv(path, nrows=n, encoding="latin-1")
    print("\n=== CSV PREVIEW ===")
    print("Path:", path)
    print("Columns:", list(df.columns))
    print("\nFirst", n, "rows:")
    print(df.head(n).to_string(index=False))
    print("===================\n")

def clean_text(s: str) -> str:
    if not isinstance(s, str):
        return ""
    s = s.lower()
    s = re.sub(r"http\S+|www\S+", " ", s)
    s = re.sub(r"@\w+", " ", s)
    s = re.sub(r"#", " ", s)
    s = re.sub(r"[^a-z0-9'\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def autodetect_columns(df: pd.DataFrame, text_col: str = None, label_col: str = None) -> tuple:
    tcol, lcol = text_col, label_col
    if tcol is None:
        for c in TEXT_CANDIDATES:
            if c in df.columns:
                tcol = c; break
    if lcol is None:
        for c in LABEL_CANDIDATES:
            if c in df.columns:
                lcol = c; break
    if tcol is None:
        raise ValueError(f"Could not detect text column. Candidates: {TEXT_CANDIDATES}")
    if lcol is None:
        raise ValueError(f"Could not detect label column. Candidates: {LABEL_CANDIDATES}")
    return tcol, lcol

def normalize_labels(y: pd.Series) -> np.ndarray:
    y_clean = y.copy()
    if y_clean.dtype == object:
        y_clean = y_clean.str.strip().str.lower()
        mapping = {
            "positive": 1, "pos": 1, "recommended": 1, "yes": 1, "true": 1,
            "negative": 0, "neg": 0, "not recommended": 0, "no": 0, "false": 0
        }
        y_num = y_clean.map(lambda v: mapping.get(str(v), np.nan))
    else:
        y_num = y_clean.astype(float)
        uniq = pd.Series(y_num.unique()).dropna().sort_values()
        if not set(uniq.tolist()).issubset({0.0, 1.0}):
            y_num = (y_num > 0).astype(int)
    return y_num.values.astype(int)

def plot_confusion_matrix(cm: np.ndarray, out_path: str, labels: List[str] = ["Negative","Positive"]):
    plt.figure(figsize=(4,4))
    plt.imshow(cm, interpolation='nearest')
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(labels))
    plt.xticks(tick_marks, labels, rotation=45)
    plt.yticks(tick_marks, labels)
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(out_path, bbox_inches="tight")
    plt.close()

def plot_roc(y_true: np.ndarray, y_score: np.ndarray, out_path: str):
    fpr, tpr, _ = roc_curve(y_true, y_score)
    plt.figure()
    plt.plot(fpr, tpr)
    plt.plot([0,1],[0,1],'--')
    plt.title("ROC Curve")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.savefig(out_path, bbox_inches="tight")
    plt.close()


In [7]:

#@title Train (stable, no GridSearch)
TEST_SIZE = 0.2     #@param {type:"number"}
MAX_FEATURES = 20000  #@param {type:"integer"}

df = pd.read_csv(DATA_PATH)
tcol, lcol = autodetect_columns(df, TEXT_COL, LABEL_COL)
print(f"[i] Using text column = {tcol}, label column = {lcol}")

df = df.dropna(subset=[tcol, lcol]).copy()
y = normalize_labels(df[lcol])
mask = np.isin(y, [0,1])
df, y = df.loc[mask], y[mask]
X_text = df[tcol].astype(str).map(clean_text)

X_train, X_test, y_train, y_test = train_test_split(
    X_text, y, test_size=TEST_SIZE, random_state=42, stratify=y
)

vectorizer = TfidfVectorizer(
    ngram_range=(1,1),
    min_df=10,
    max_df=0.9,
    max_features=MAX_FEATURES,
    stop_words="english",
    dtype=np.float32
)
clf = LinearSVC(dual=False)
pipe = Pipeline([("tfidf", vectorizer), ("svm", clf)])
pipe.set_params(svm__C=1.0, svm__class_weight="balanced")
pipe.fit(X_train, y_train)
print("[i] Trained LinearSVC with C=1.0, class_weight='balanced' (no GridSearch)")

y_pred = pipe.predict(X_test)
try:
    y_score = pipe.decision_function(X_test)
    auc = roc_auc_score(y_test, y_score)
except Exception:
    y_score = None
    auc = None

report = classification_report(y_test, y_pred, digits=4)
acc = accuracy_score(y_test, y_pred)
prec, rec, f1, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")

print("\n=== Metrics ===")
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}  Recall: {rec:.4f}  F1: {f1:.4f}")
print(f"ROC-AUC: {auc:.4f}" if auc is not None else "ROC-AUC: N/A")
print("\nClassification report:\n", report)

os.makedirs(OUT_DIR, exist_ok=True)
cm = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(cm, os.path.join(OUT_DIR, "confusion_matrix.png"))
if y_score is not None:
    plot_roc(y_test, y_score, os.path.join(OUT_DIR, "roc_curve.png"))

joblib.dump(pipe.named_steps["svm"], os.path.join(OUT_DIR, "model.joblib"))
joblib.dump(pipe.named_steps["tfidf"], os.path.join(OUT_DIR, "vectorizer.joblib"))
pd.DataFrame({"text": X_test, "y_true": y_test, "y_pred": y_pred}).to_csv(
    os.path.join(OUT_DIR, "predictions.csv"), index=False
)

try:
    svm = pipe.named_steps["svm"]
    tfidf = pipe.named_steps["tfidf"]
    feats = np.asarray(tfidf.get_feature_names_out())
    coefs = svm.coef_.ravel()
    top_pos_idx = np.argsort(coefs)[-30:][::-1]
    top_neg_idx = np.argsort(coefs)[:30]
    with open(os.path.join(OUT_DIR, "top_features.txt"), "w", encoding="utf-8") as f:
        f.write("Top positive features:\n")
        f.write("\n".join([f"{feats[i]}\t{coefs[i]:.4f}" for i in top_pos_idx]))
        f.write("\n\nTop negative features:\n")
        f.write("\n".join([f"{feats[i]}\t{coefs[i]:.4f}" for i in top_neg_idx]))
except Exception as e:
    print("[!] Skipped top-features extraction:", e)

metrics = {
    "params": {"svm__C": 1.0, "svm__class_weight": "balanced"},
    "accuracy": float(acc),
    "precision": float(prec),
    "recall": float(rec),
    "f1": float(f1),
    "roc_auc": float(auc) if auc is not None else None,
}
with open(os.path.join(OUT_DIR, "metrics.json"), "w") as f:
    json.dump(metrics, f, indent=2)

print(f"\n[✓] Done. Artifacts saved to: {OUT_DIR}")


[i] Using text column = review_text, label column = review_score
[i] Trained LinearSVC with C=1.0, class_weight='balanced' (no GridSearch)

=== Metrics ===
Accuracy: 0.7555
Precision: 0.9631  Recall: 0.7296  F1: 0.8303
ROC-AUC: 0.8966

Classification report:
               precision    recall  f1-score   support

           0     0.4153    0.8730    0.5628    231137
           1     0.9631    0.7296    0.8303   1050824

    accuracy                         0.7555   1281961
   macro avg     0.6892    0.8013    0.6966   1281961
weighted avg     0.8644    0.7555    0.7821   1281961


[✓] Done. Artifacts saved to: /content/outputs


In [8]:

#@title Preview CSV quickly
N = 5 #@param {type:"integer"}
preview_csv(DATA_PATH, n=N)



=== CSV PREVIEW ===
Path: /content/drive/MyDrive/SVM/dataset.csv
Columns: ['app_id', 'app_name', 'review_text', 'review_score', 'review_votes']

First 5 rows:
 app_id       app_name                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  

In [9]:
import pandas as pd

sample = pd.DataFrame({
    "review_text": [
        "This game is amazing and very fun!",
        "Terrible experience, not recommended at all.",
        "I liked the graphics but the gameplay was boring."
    ]
})
sample.to_csv("/content/new_reviews.csv", index=False)
print("Sample CSV saved!")


Sample CSV saved!


In [16]:
# ==== Predict Steam Reviews Sentiment (robust CSV reader) ====
import os, re, json, joblib
import pandas as pd

# -------------------- Config --------------------
OUT_DIR      = "/content/outputs"          # nơi lưu vectorizer.joblib & model.joblib sau khi train
INPUT_CSV    = "/content/new_reviews.csv"  # file review mới cần dự đoán
TEXT_COL_NEW = "review_text"               # tên cột text nếu biết chắc; nếu không đúng sẽ tự dò
TEXT_CANDIDATES = ["review_text","text","review","content","message","comment","body","sentence"]

# -------------------- Clean text (phải giống lúc TRAIN) -------------------
def clean_text(s: str) -> str:
    if not isinstance(s, str):
        return ""
    s = s.strip().lower()
    s = re.sub(r"http\S+|www\S+", " ", s)
    s = re.sub(r"@\w+", " ", s)
    s = re.sub(r"#", " ", s)
    # Nếu TRAIN giữ Unicode/TV, thay regex dưới cho KHỚP rồi TRAIN lại:
    # s = re.sub(r"[^0-9a-zà-ỹÀ-Ỵ̃́̀̃đĐ'\s]", " ", s, flags=re.IGNORECASE)
    s = re.sub(r"[^a-z0-9'\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

# -------------------- Load artifacts -------------------------
vec_path = os.path.join(OUT_DIR, "vectorizer.joblib")
mdl_path = os.path.join(OUT_DIR, "model.joblib")
if not os.path.exists(vec_path) or not os.path.exists(mdl_path):
    raise FileNotFoundError(
        f"Artifacts not found in OUT_DIR = '{OUT_DIR}'. "
        f"Need both files: {vec_path} and {mdl_path}."
    )
vec = joblib.load(vec_path)
svm = joblib.load(mdl_path)

# -------------------- Robust CSV/Excel reader ----------------
def robust_read_table(path: str) -> pd.DataFrame:
    if not os.path.exists(path):
        raise FileNotFoundError(f"Input file not found: {path}")
    if os.path.getsize(path) == 0:
        raise ValueError(f"Input file is empty (0 bytes): {path}")

    encodings = ["utf-8", "utf-8-sig", "latin-1", "utf-16", "utf-16le", "utf-16be"]
    seps = [None, ",", ";", "\t"]  # None => auto-sniff (engine='python')
    last_err = None

    for enc in encodings:
        for sep in seps:
            try:
                df = pd.read_csv(
                    path,
                    encoding=enc,
                    sep=sep,                 # None -> sniff
                    engine="python",
                    on_bad_lines="skip"
                )
                if not df.empty:
                    print(f"[i] Read OK: encoding={enc}, sep={'auto' if sep is None else repr(sep)}, shape={df.shape}")
                    return df
            except Exception as e:
                last_err = e

    # Fallback: có thể là Excel đổi đuôi
    for engine in ("openpyxl", "xlrd", None):
        try:
            df = pd.read_excel(path, engine=engine) if engine else pd.read_excel(path)
            if not df.empty:
                print(f"[i] Read OK as Excel (engine={engine}): shape={df.shape}")
                return df
        except Exception as e:
            last_err = e

    raise ValueError(f"Could not parse '{path}' as CSV/Excel. Last error: {last_err}")

# -------------------- Read input -----------------------------
df_new = robust_read_table(INPUT_CSV)
print("\n=== HEAD (5 rows) ===")
print(df_new.head(5).to_string(index=False))
print("=====================\n")
print(f"[i] Columns found in input CSV: {list(df_new.columns)}")
print("[!] If 'review_text' is not present, set TEXT_COL_NEW to a valid column name.")

if df_new.empty:
    raise ValueError(f"Input '{INPUT_CSV}' produced an empty DataFrame after parsing.")

# -------------------- Pick text column (robust, chỉ 1 lần) ----------------
def pick_text_column(df: pd.DataFrame, preferred: str, candidates: list[str]) -> str:
    if preferred in df.columns:                 # 1) ưu tiên cột bạn chỉ định
        return preferred
    for c in candidates:                        # 2) thử danh sách gợi ý
        if c in df.columns:
            return c
    # 3) fallback: chọn cột có nhiều ô không-rỗng nhất
    counts = {col: df[col].astype(str).str.strip().ne("").sum() for col in df.columns}
    if all(v == 0 for v in counts.values()):
        raise ValueError(f"All columns appear empty after basic string check. Columns: {list(df.columns)}")
    tcol_fb = max(counts, key=counts.get)
    print(f"[!] Auto-picked text column = '{tcol_fb}' (no known name matched).")
    return tcol_fb

tcol = pick_text_column(df_new, TEXT_COL_NEW, TEXT_CANDIDATES)
print(f"[i] Using text column = {tcol}")

# -------------------- Clean + filter empty rows --------------
X_raw = df_new[tcol].astype(str)
X_clean = X_raw.map(clean_text)
mask_nonempty = X_clean.str.strip().ne("")
df_use = df_new.loc[mask_nonempty].copy()
X_use = X_clean.loc[mask_nonempty]

if X_use.shape[0] == 0:
    n_total = len(df_new)
    n_na = df_new[tcol].isna().sum()
    n_empty_after_clean = n_total - mask_nonempty.sum()
    raise ValueError(
        "All rows became empty after cleaning; nothing to transform.\n"
        f"- Total rows: {n_total}\n"
        f"- NA in '{tcol}': {n_na}\n"
        f"- Empty after clean: {n_empty_after_clean}\n"
        "Check TEXT_COL_NEW, cleaning rules, or input content."
    )

# -------------------- Transform + predict --------------------
Xv = vec.transform(X_use)  # chỉ transform, KHÔNG fit lại!
preds = svm.predict(Xv)

# -------------------- Save outputs ---------------------------
os.makedirs(OUT_DIR, exist_ok=True)
pred_out = pd.DataFrame({"text": df_use[tcol].values, "pred": preds})
pred_path = os.path.join(OUT_DIR, "predictions_new.csv")
pred_out.to_csv(pred_path, index=False)
print(f"[✓] Saved predictions to: {pred_path}")

# Lưu các dòng bị loại vì rỗng sau clean (để bạn kiểm tra)
dropped = df_new.loc[~mask_nonempty, [tcol]]
if not dropped.empty:
    dropped_path = os.path.join(OUT_DIR, "rows_dropped_empty_after_clean.csv")
    dropped.to_csv(dropped_path, index=False)
    print(f"[i] Also saved dropped rows to: {dropped_path}")


[i] Read OK: encoding=utf-8, sep=auto, shape=(2, 4)

=== HEAD (5 rows) ===
                                           r                vi     w_t  xt
                                    This gam  is amazing and v ry fun! NaN
Terrible experience, not recommended at all.              None    None NaN

[i] Columns found in input CSV: ['r', 'vi', 'w_t', 'xt']
[!] If 'review_text' is not present, set TEXT_COL_NEW to a valid column name.
[!] Auto-picked text column = 'r' (no known name matched).
[i] Using text column = r
[✓] Saved predictions to: /content/outputs/predictions_new.csv


In [18]:
# ==== Predict Steam Reviews Sentiment (Regenerated) ====
import os, re, json, joblib
import pandas as pd
import numpy as np # Import numpy

# -------------------- Config --------------------
OUT_DIR = "/content/outputs"          # thư mục đã lưu vectorizer.joblib & model.joblib sau khi train
INPUT_CSV    = "/content/new_reviews.csv"  # file review mới cần dự đoán
TEXT_COL_NEW = "review_text"               # tên cột text nếu bạn biết chắc; nếu không đúng sẽ tự dò
TEXT_CANDIDATES = ["review_text","text","review","content","message","comment","body","sentence"]

# -------------------- Clean text (Simplified) -------------------
def clean_text(s: str) -> str:
    if not isinstance(s, str):
        return ""
    s = s.strip()
    s = s.lower()
    return s

# -------------------- Load artifacts -------------------------
vec_path = os.path.join(OUT_DIR, "vectorizer.joblib")
mdl_path = os.path.join(OUT_DIR, "model.joblib")
if not os.path.exists(vec_path) or not os.path.exists(mdl_path):
    raise FileNotFoundError(
        "Artifacts not found in OUT_DIR = '/content/outputs'. "
        f"Need both files: {vec_path} and {mdl_path}."
    )
vec = joblib.load(vec_path)
svm = joblib.load(mdl_path)
print("[i] Loaded vectorizer and model.")

# -------------------- Read input -----------------------------
if not os.path.exists(INPUT_CSV):
    raise FileNotFoundError(f"Input CSV file not found at '{INPUT_CSV}'. Please run the upload cell first.")

try:
    # Attempt to read with different encodings
    df_new = pd.read_csv(INPUT_CSV, encoding='utf-8')
except UnicodeDecodeError:
    try:
        df_new = pd.read_csv(INPUT_CSV, encoding='latin-1')
    except Exception as e:
        raise Exception(f"Could not read CSV file with utf-8 or latin-1 encoding: {e}")


if df_new.empty:
    raise ValueError(f"Input CSV '{INPUT_CSV}' is empty or mis-read (wrong path/encoding?).")

print(f"[i] Read input CSV: {INPUT_CSV} with shape {df_new.shape}")
print("\n=== Head of Input CSV ===")
display(df_new.head())
print("=========================\n")


# -------------------- Pick text column -----------------------
if TEXT_COL_NEW in df_new.columns:
    tcol = TEXT_COL_NEW
else:
    tcol = next((c for c in TEXT_CANDIDATES if c in df_new.columns), None)
    if tcol is None:
        raise ValueError(
            "Could not detect text column in input.\n"
            f"- TEXT_COL_NEW='{TEXT_COL_NEW}'\n"
            f"- Candidates tried: {TEXT_CANDIDATES}\n"
            f"- Found columns: {list(df_new.columns)}"
        )
print(f"[i] Using text column = '{tcol}'")

# -------------------- Clean + filter empty rows --------------
# Ensure the text column exists before attempting to access it
if tcol not in df_new.columns:
     raise ValueError(f"Text column '{tcol}' not found in the input DataFrame.")


X_raw = df_new[tcol].astype(str)
X_clean = X_raw.map(clean_text)

# Filter out rows where cleaning results in empty strings
mask_nonempty = X_clean.str.strip().ne("")

df_use = df_new.loc[mask_nonempty].copy()
X_use = X_clean.loc[mask_nonempty]

print(f"[i] Rows remaining after cleaning and filtering: {X_use.shape[0]} out of {df_new.shape[0]}")

if X_use.shape[0] == 0:
    n_total = len(df_new)
    n_na = df_new[tcol].isna().sum()
    n_empty_after_clean = n_total - mask_nonempty.sum()
    raise ValueError(
        "All rows became empty after cleaning; nothing to transform.\n"
        f"- Total rows: {n_total}\n"
        f"- NA in '{tcol}': {n_na}\n"
        f"- Empty after clean: {n_empty_after_clean}\n"
        "Check TEXT_COL_NEW, cleaning rules, or input content."
    )

# -------------------- Transform + predict --------------------
Xv = vec.transform(X_use)  # just transform, DO NOT fit again!
preds = svm.predict(Xv)

# -------------------- Save outputs ---------------------------
os.makedirs(OUT_DIR, exist_ok=True)
pred_out = pd.DataFrame({"text": df_use[tcol].values, "pred": preds})
pred_path = os.path.join(OUT_DIR, "predictions_new.csv")
pred_out.to_csv(pred_path, index=False)
print(f"[✓] Saved predictions to: {pred_path}")

# Save rows dropped due to empty text after cleaning
dropped = df_new.loc[~mask_nonempty] # Select full rows that were dropped
if not dropped.empty:
    dropped_path = os.path.join(OUT_DIR, "rows_dropped_empty_after_clean.csv")
    dropped.to_csv(dropped_path, index=False)
    print(f"[i] Also saved dropped rows to: {dropped_path}")

print("\n[✓] Prediction process completed.")

[i] Loaded vectorizer and model.
[i] Read input CSV: /content/new_reviews.csv with shape (3, 1)

=== Head of Input CSV ===


Unnamed: 0,review_text
0,This game is amazing and very fun!
1,"Terrible experience, not recommended at all."
2,I liked the graphics but the gameplay was boring.



[i] Using text column = 'review_text'
[i] Rows remaining after cleaning and filtering: 3 out of 3
[✓] Saved predictions to: /content/outputs/predictions_new.csv

[✓] Prediction process completed.


In [19]:
import pandas as pd
import os

# Define the path where the new CSV will be saved
INPUT_CSV = "/content/new_reviews.csv"

# Create a sample DataFrame with the required column name
data = {'review_text': [
    "This game is amazing, I love it!",
    "Had a terrible experience, not recommended.",
    "It's an okay game, nothing special.",
    "The graphics are stunning!",
    "Very buggy and unplayable."
]}
df_sample = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df_sample.to_csv(INPUT_CSV, index=False)

print(f"Sample CSV created at: {INPUT_CSV}")
display(df_sample)

Sample CSV created at: /content/new_reviews.csv


Unnamed: 0,review_text
0,"This game is amazing, I love it!"
1,"Had a terrible experience, not recommended."
2,"It's an okay game, nothing special."
3,The graphics are stunning!
4,Very buggy and unplayable.


In [None]:
#@title Upload the new reviews CSV file
from google.colab import files
import shutil
import os

# Define the expected input path from the failed cell
INPUT_CSV = "/content/new_reviews_sample.csv"

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

# Assume only one file was uploaded and move it to the specified INPUT_CSV path
uploaded_filename = list(uploaded.keys())[0]

# Remove existing file if it exists to avoid errors during move
if os.path.exists(INPUT_CSV):
    os.remove(INPUT_CSV)

shutil.move(uploaded_filename, INPUT_CSV)

print(f"\n[✓] File '{uploaded_filename}' uploaded and moved to '{INPUT_CSV}'")

Saving new_reviews_sample.csv to new_reviews_sample (1).csv
User uploaded file "new_reviews_sample (1).csv" with length 1487 bytes

[✓] File 'new_reviews_sample (1).csv' uploaded and moved to '/content/new_reviews_sample.csv'
