# Oversampling

In [None]:
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
import matplotlib.pyplot as plt

OLD_PATH = 'data.csv'
NEW_PATH = 'data_undersampled.csv'
X = 1
O = -1
BLANK = 0

O_WIN   = 0
DRAW    = 1
ONGOING = 2
X_WIN   = 3

# Undersampling

In [None]:
df = pd.read_csv(OLD_PATH)

# Corrigi no dataset, não precisa mais mapear
mapping = {
    -1.0: 0,   # O_WIN
     0.0: 1,   # DRAW
     0.5: 2,   # ONGOING
     1.0: 3    # X_WIN
}
reverse_mapping = {v: k for k, v in mapping.items()}
df["category"] = df["category"].map(mapping)

# Descrição do dataset original
print("📌 Dataset original:")
print(df["category"].value_counts())

# Separa features e target
X = df.drop(columns=["category"])
y = df["category"]

# Aplica undersampling
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X, y)

# Junta novamente
df_undersampled = pd.DataFrame(X_resampled, columns=X.columns)
df_undersampled["category"] = y_resampled.map(reverse_mapping)  # mapeia de volta

# Descrição do dataset balanceado
print("\n✅ Dataset após undersampling:")
print(df_undersampled["category"].value_counts())

# Exporta CSV
df_undersampled.to_csv(NEW_PATH, index=False)
print("📁 Dataset com undersampling salvo como 'data_undersampled.csv'")