In [1]:
import numpy as np
import pandas as pd

# Function to generate random HSV values within a specific range for each color
def generate_hsv_color(color_name, hue_range, saturation_range, value_range, num_samples=100):
    h = np.random.uniform(hue_range[0], hue_range[1], num_samples)
    s = np.random.uniform(saturation_range[0], saturation_range[1], num_samples)
    v = np.random.uniform(value_range[0], value_range[1], num_samples)
    df = pd.DataFrame({'H': h, 'S': s, 'V': v, 'color_name': color_name})
    return df

# Define HSV ranges for each color
colors_hsv_ranges = {
    'black': ([0, 360], [0, 0], [0, 50]),
    'blue': ([180, 250], [50, 255], [50, 255]),
    'brown': ([10, 30], [50, 255], [50, 255]),
    'cyan': ([85, 165], [50, 255], [50, 255]),
    'green': ([60, 150], [50, 255], [50, 255]),
    'grey': ([0, 360], [0, 50], [50, 200]),
    'orange': ([5, 40], [100, 255], [100, 255]),
    'pink': ([300, 340], [50, 255], [50, 255]),
    'purple': ([260, 290], [50, 255], [50, 255]),
    'red': ([340, 20], [50, 255], [50, 255]),
    'white': ([0, 360], [0, 0], [200, 255]),
    'yellow': ([50, 70], [100, 255], [100, 255]),
}

# Create the HSV dataset
dataset = pd.DataFrame()
for color_name, hsv_ranges in colors_hsv_ranges.items():
    df_color = generate_hsv_color(color_name, hsv_ranges[0], hsv_ranges[1], hsv_ranges[2])
    dataset = pd.concat([dataset, df_color], ignore_index=True)

# Shuffle the dataset
dataset = dataset.sample(frac=1, random_state=42).reset_index(drop=True)

# Save the dataset to a CSV file
dataset.to_csv('color_dataset_hsv.csv', index=False)

In [7]:
data = pd.read_csv("D:/PracticeAll/ComputerVision/opencv/color_dataset_hsv.csv")
data.tail()

Unnamed: 0,H,S,V,color_name
1195,72.608289,0.0,222.781694,white
1196,152.69034,0.0,210.918497,white
1197,56.225015,209.901132,206.042874,yellow
1198,263.910881,83.620784,157.337619,purple
1199,64.010135,115.487929,119.433894,yellow


In [6]:
data.describe()

Unnamed: 0,H,S,V
count,1200.0,1200.0,1200.0
mean,155.854457,120.603968,149.760929
std,107.071819,82.454414,68.535196
min,1.507388,0.0,0.008222
25%,59.687413,50.04508,95.641227
50%,143.054268,128.851393,151.919088
75%,261.338174,192.722136,212.038609
max,358.428683,254.951402,254.768023


In [8]:
data.shape

(1200, 4)

In [10]:
data.info

<bound method DataFrame.info of                H           S           V color_name
0      61.447922  174.743621  130.968697     yellow
1     277.790508  163.037774  108.535545     purple
2     248.702546  224.066992   98.430223       blue
3     121.799799  161.926245  170.844355      green
4       3.451662    0.000000   34.420650      black
...          ...         ...         ...        ...
1195   72.608289    0.000000  222.781694      white
1196  152.690340    0.000000  210.918497      white
1197   56.225015  209.901132  206.042874     yellow
1198  263.910881   83.620784  157.337619     purple
1199   64.010135  115.487929  119.433894     yellow

[1200 rows x 4 columns]>

In [11]:
data.isna().any()

H             False
S             False
V             False
color_name    False
dtype: bool

In [12]:
data['color_name'].value_counts()

color_name
yellow    100
purple    100
blue      100
green     100
black     100
cyan      100
red       100
brown     100
grey      100
pink      100
orange    100
white     100
Name: count, dtype: int64