In [5]:
import pandas as pd
from sklearn.utils import resample
df=pd.DataFrame({
    'Age':[22,24,25,26,30,32,34,36,37,40,45,50,55],
    'Income':[2000,2500,2700,3200,3700,3800,4000,4200,4500,4700,5000,5500,6000],
    'Class':['High','Low','Low','High','High','Low','High','High','Low','Low','High','High','Low']
})
df

Unnamed: 0,Age,Income,Class
0,22,2000,High
1,24,2500,Low
2,25,2700,Low
3,26,3200,High
4,30,3700,High
5,32,3800,Low
6,34,4000,High
7,36,4200,High
8,37,4500,Low
9,40,4700,Low


In [7]:
df_high=df[df["Class"]=='High']
df_low=df[df["Class"]=='Low']

# DownSampling

In [10]:
# Downsample majority class ('High' in this case)
df_high_downsampled = resample(df_high, replace=False,n_samples=len(df_low),random_state=42)  
# Combine downsampled majority class with minority class
df_balanced = pd.concat([df_high_downsampled, df_low])
# Print the balanced dataset
print(df_balanced)

    Age  Income Class
0    22    2000  High
3    26    3200  High
10   45    5000  High
4    30    3700  High
7    36    4200  High
6    34    4000  High
1    24    2500   Low
2    25    2700   Low
5    32    3800   Low
8    37    4500   Low
9    40    4700   Low
12   55    6000   Low


In [11]:
print(df_balanced['Class'].value_counts())

Class
High    6
Low     6
Name: count, dtype: int64


# UpSampling

In [14]:
# Corrected sample dataset
df = pd.DataFrame({
    'Age': [22, 25, 27, 28, 30, 35, 40, 45, 50, 55, 60, 65, 70],
    'Income': [2000, 2500, 2700, 3200, 3500, 3800, 4000, 4300, 4500, 4800, 5000, 5300, 5500],
    'Class': ['Minority', 'Minority', 'Minority', 'Minority', 'Majority', 'Majority', 'Majority', 'Majority', 'Majority', 'Majority', 'Majority', 'Majority', 'Majority']
})

# Separate the data by class
df_minority = df[df['Class'] == 'Minority']
df_majority = df[df['Class'] == 'Majority']

In [15]:
df

Unnamed: 0,Age,Income,Class
0,22,2000,Minority
1,25,2500,Minority
2,27,2700,Minority
3,28,3200,Minority
4,30,3500,Majority
5,35,3800,Majority
6,40,4000,Majority
7,45,4300,Majority
8,50,4500,Majority
9,55,4800,Majority


In [18]:
# Upsample the minority class
df_minority_upsampled = resample(df_minority, replace=True, n_samples=len(df_majority), random_state=42)

# Combine the upsampled minority class with the majority class
df_balanced = pd.concat([df_majority, df_minority_upsampled])
print(df_balanced)

    Age  Income     Class
4    30    3500  Majority
5    35    3800  Majority
6    40    4000  Majority
7    45    4300  Majority
8    50    4500  Majority
9    55    4800  Majority
10   60    5000  Majority
11   65    5300  Majority
12   70    5500  Majority
2    27    2700  Minority
3    28    3200  Minority
0    22    2000  Minority
2    27    2700  Minority
2    27    2700  Minority
3    28    3200  Minority
0    22    2000  Minority
0    22    2000  Minority
2    27    2700  Minority


In [17]:
df_balanced['Class'].value_counts()

Class
Majority    9
Minority    9
Name: count, dtype: int64