# Oversampling

In [10]:
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
# Carrega o dataset
df = pd.read_csv("data.csv")

# Aplica o mapeamento de classes
mapping = {
    -1.0: 0,   # O_WIN
     0.0: 1,   # DRAW
     0.5: 2,   # ONGOING
     1.0: 3    # X_WIN
}
reverse_mapping = {v: k for k, v in mapping.items()}
df["category"] = df["category"].map(mapping)

# Descrição do dataset original
print("📌 Dataset original:")
print(df["category"].value_counts())

# Separa features e target
X = df.drop(columns=["category"])
y = df["category"]

# Aplica oversampling
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

# Junta novamente
df_oversampled = pd.DataFrame(X_resampled, columns=X.columns)
df_oversampled["category"] = y_resampled.map(reverse_mapping)  # mapeia de volta

# Descrição do dataset balanceado
print("\n✅ Dataset após oversampling:")
print(df_oversampled["category"].value_counts())

# Exporta CSV
df_oversampled.to_csv("data_oversampled.csv", index=False)
print("📁 Dataset com oversampling salvo como 'data_oversampled.csv'")


📌 Dataset original:
category
3    941
0    941
2    128
1     32
Name: count, dtype: int64

✅ Dataset após oversampling:
category
 1.0    941
-1.0    941
 0.0    941
 0.5    941
Name: count, dtype: int64
📁 Dataset com oversampling salvo como 'data_oversampled.csv'


# Undersampling

In [11]:
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler

# Carrega o dataset
df = pd.read_csv("data.csv")

# Aplica o mapeamento de classes
mapping = {
    -1.0: 0,   # O_WIN
     0.0: 1,   # DRAW
     0.5: 2,   # ONGOING
     1.0: 3    # X_WIN
}
reverse_mapping = {v: k for k, v in mapping.items()}
df["category"] = df["category"].map(mapping)

# Descrição do dataset original
print("📌 Dataset original:")
print(df["category"].value_counts())

# Separa features e target
X = df.drop(columns=["category"])
y = df["category"]

# Aplica undersampling
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X, y)

# Junta novamente
df_undersampled = pd.DataFrame(X_resampled, columns=X.columns)
df_undersampled["category"] = y_resampled.map(reverse_mapping)  # mapeia de volta

# Descrição do dataset balanceado
print("\n✅ Dataset após undersampling:")
print(df_undersampled["category"].value_counts())

# Exporta CSV
df_undersampled.to_csv("data_undersampled.csv", index=False)
print("📁 Dataset com undersampling salvo como 'data_undersampled.csv'")

📌 Dataset original:
category
3    941
0    941
2    128
1     32
Name: count, dtype: int64

✅ Dataset após undersampling:
category
-1.0    32
 0.0    32
 0.5    32
 1.0    32
Name: count, dtype: int64
📁 Dataset com undersampling salvo como 'data_undersampled.csv'
