# **Step by step**

In [1]:
import pandas as pd
import numpy as np

# Sample DataFrame
data = {'col': ['a', 'b', np.nan, 'c', np.nan, np.nan, 'a', 'b', 'c', np.nan, 'a', np.nan, 'b', 'c', np.nan, np.nan]}
df = pd.DataFrame(data)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

Original DataFrame:
    col
0     a
1     b
2   NaN
3     c
4   NaN
5   NaN
6     a
7     b
8     c
9   NaN
10    a
11  NaN
12    b
13    c
14  NaN
15  NaN


In [2]:
# Identify the non-null values and their frequencies
non_na_values = df['col'].dropna().value_counts()
total_na = df['col'].isna().sum()

In [3]:
non_na_values

col
a    3
b    3
c    3
Name: count, dtype: int64

In [4]:
total_na

7

In [5]:
# Calculate the proportional distribution
distribution = []
for value, count in non_na_values.items():
    distribution.extend([value] * (total_na // len(non_na_values)))

distribution

['a', 'a', 'b', 'b', 'c', 'c']

In [6]:
# Balance the remaining NaNs, if any
remaining_na = total_na - len(distribution)
for i in range(remaining_na):
    distribution.append(non_na_values.index[i % len(non_na_values)])

distribution

['a', 'a', 'b', 'b', 'c', 'c', 'a']

In [7]:
# Shuffle the distribution to randomize (optional)
np.random.seed(42)
np.random.shuffle(distribution)

distribution

['a', 'a', 'c', 'b', 'c', 'b', 'a']

In [8]:
# Fill the NaN values with the calculated distribution
df.loc[df['col'].isna(), 'col'] = distribution

In [9]:
# Display the DataFrame with NaN values filled evenly
print("\nDataFrame with NaN values filled evenly:")
print(df)


DataFrame with NaN values filled evenly:
   col
0    a
1    b
2    a
3    c
4    a
5    c
6    a
7    b
8    c
9    b
10   a
11   c
12   b
13   c
14   b
15   a


# **As Function**

In [10]:
def fill_na_evenly(df, col_name):
  # Identify the non-null values and their frequencies
  non_na_values = df[col_name].dropna().value_counts()
  total_na = df[col_name].isna().sum()

  # Calculate the proportional distribution
  distribution = []
  for value, count in non_na_values.items():
      distribution.extend([value] * (total_na // len(non_na_values)))

  # Balance the remaining NaNs, if any
  remaining_na = total_na - len(distribution)
  for i in range(remaining_na):
      distribution.append(non_na_values.index[i % len(non_na_values)])

  # Shuffle the distribution to randomize (optional)
  np.random.seed(42)
  np.random.shuffle(distribution)

  # Fill the NaN values with the calculated distribution
  df.loc[df[col_name].isna(), col_name] = distribution

  # Display the DataFrame with NaN values filled evenly
  print("\nDataFrame with NaN values filled evenly:")
  print(df)

In [11]:
fill_na_evenly(df, 'col')


DataFrame with NaN values filled evenly:
   col
0    a
1    b
2    a
3    c
4    a
5    c
6    a
7    b
8    c
9    b
10   a
11   c
12   b
13   c
14   b
15   a
