In [5]:
import pandas as pd
import csv
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from sklearn.cluster import KMeans
import cv2
from matplotlib.colors import rgb_to_hsv
import pandas as pd
import re
import ast

import seaborn as sns

In [None]:
df = pd.read_csv(f"meta_data_billboard.csv",  header=0)

In [171]:
from utils_dataset.location_processor import LocationProcessor

df['location'] = df['artist_country'].apply(LocationProcessor.get_country_name)

In [None]:
def find_dominant_colors(image_path):
    image_path = f'images/{image_path}'

    k = 5

    image = cv2.imread(image_path)
    
    try:
        # Attempt to convert the image and reshape it
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        pixels = image.reshape((-1, 3))

    except Exception as e:
        # Handle the exception if the conversion fails
        print(f"An error occurred: {e}")
        return None

    else:
        # This block will be executed if no exception is raised in the try block
        # Use KMeans to find dominant colors
        
        kmeans = KMeans(n_clusters=k, n_init=6)
        kmeans.fit(pixels)
        
        # Get the cluster centers (dominant colors)
        dominant_colors = kmeans.cluster_centers_
        
        # Count the occurrences of each label (to find how many pixels in each cluster)
        unique, counts = np.unique(kmeans.labels_, return_counts=True)
        
        # Calculate the percentage of each cluster
        percentages = counts / np.sum(counts) * 100
        
        # Combine dominant colors and their percentages
        color_percentages = [(list(color), percentage) for color, percentage in zip(dominant_colors, percentages)]
    
    return color_percentages

In [None]:
df['dominant'] = df['image_name'].apply(find_dominant_colors)


In [None]:
df.to_csv(f"billboard_stats_dominant.csv", index=False)


In [113]:
df = pd.read_csv(f"billboard_stats_dominant.csv",  header=0)

In [114]:
def calculate_symmetry_score(image_name):
    merged_dataset_path = "images"

    if ".png" not in image_name:
        local_file_name = f"{merged_dataset_path}/{image_name}.png"
    else:
        local_file_name = f"{merged_dataset_path}/{image_name}"
        
    image = cv2.imread(local_file_name)
    
    try:
        # Convert to grayscale and split the image
        #print("calculate_symmetry_score")
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        height, width = gray_image.shape
        left_half = gray_image[:, :width // 2]
        right_half = cv2.flip(gray_image[:, width // 2:], 1)

    except Exception as e:
        # Handle any exceptions that occur during conversion and processing
        print(f"An error occurred during image processing: {e}")
        return None

    else:
        # This block will be executed if no exception is raised in the try block
        # Calculate histograms and symmetry score
        hist_left = cv2.calcHist([left_half], [0], None, [256], [0, 256])
        hist_right = cv2.calcHist([right_half], [0], None, [256], [0, 256])
        
        score = cv2.compareHist(hist_left, hist_right, cv2.HISTCMP_CORREL)
        return score


def classify_symmetry(score):
    if not score:
        return "N/A"
    if score > 0.8:
        return "High Symmetry"
    elif score > 0.5:
        return "Moderate Symmetry"
    elif score > -0.5:
        return "Low Symmetry"
    else:
        return "Negative Symmetry"

In [115]:
def analyze_use_of_space(image_name):
    merged_dataset_path = "images"

    if ".png" not in image_name:
        local_file_name = f"{merged_dataset_path}/{image_name}.png"
    else:
        local_file_name = f"{merged_dataset_path}/{image_name}"
        
    image = cv2.imread(local_file_name)
    try:
        # Convert the image to grayscale
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except Exception as e:
        # Handle any exceptions that occur during conversion
        print(f"An error occurred while converting the image to grayscale: {e}")
        return None

    try:
        # Detect edges using Canny
        edges = cv2.Canny(gray_image, 100, 200)
    except Exception as e:
        # Handle any exceptions that occur during edge detection
        print(f"An error occurred while detecting edges: {e}")
        return None

    # Calculate the negative space ratio
    total_pixels = gray_image.size
    edge_pixels = np.count_nonzero(edges)
    negative_space_ratio = (total_pixels - edge_pixels) / total_pixels

    return negative_space_ratio

def classify_use_of_space(negative_space_ratio):
    if not negative_space_ratio:
        return None
    if negative_space_ratio > 0.8:
        return "High use of negative space"
    elif negative_space_ratio > 0.6:
        return "Moderate use of negative space"
    else:
        return "Low use of negative space"

In [116]:
from skimage.feature import graycomatrix, graycoprops
from skimage.color import rgb2gray
from skimage.io import imread
import numpy as np

def extract_texture_features(image_path):
    # Load image and convert to grayscale
    path = f'images/{image_path}'
    image = imread(path)
    if len(image.shape) == 2:
        print("The image is already in grayscale.")
        gray_image = image
    elif image.shape[2] == 2:
        # Assuming the first channel is the grayscale intensity
        gray_image = image[:, :, 0]
    elif image.shape[2] == 4:
        # Assuming the first channel is the grayscale intensity
        image = image[:,:,:3]
        
        gray_image = rgb2gray(image)
        gray_image = (gray_image * 255).astype('uint8')
    elif len(image.shape) == 4:
        if image.shape[0] == 1:
            
            image_squeezed = np.squeeze(image)
            print(image_squeezed.shape)
            gray_image = rgb2gray(image_squeezed)
            gray_image = (gray_image * 255).astype('uint8')

    else:
        
        # Convert to grayscale
        gray_image = rgb2gray(image)
        gray_image = (gray_image * 255).astype('uint8')

    # This block will 
    
    # Compute GLCM
    glcm = graycomatrix(gray_image, distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
    
    # Compute texture features
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]
    
    return {
        'contrast': contrast,
        'dissimilarity': dissimilarity,
        'homogeneity': homogeneity,
        'energy': energy,
        'correlation': correlation,
    }

def classify_texture(row, feature):
    # Contrast
    
    if feature == 'contrast':
        if row['contrast'] < 1000:
            return 'Somewhat Smooth'
        elif row['contrast'] < 1500:
            return 'Moderately Rough'
        else:
            return 'Very Rough'
        
    # Dissimilarity
    if feature == 'dissimilarity':
        if row['dissimilarity'] < 10:
            return 'Somewhat Fine'
        elif row['dissimilarity'] < 20:
            return 'Moderately Coarse'
        else:
            return 'Very Coarse'
    
    # Homogeneity
    if feature == 'homogeneity':
        if row['homogeneity'] > 0.2 and row['homogeneity'] < 0.6:
            return 'Somewhat Homogeneous'
        elif row['homogeneity'] > 0.6:
            return 'Homogeneous'    
        else:
            return 'Heterogeneous'
    
    # Energy
    if feature == 'energy':
        if row['energy'] > 0.03 and row['energy'] < 0.2:
            return 'Somewhat Uniform'
        elif row['energy'] >= 0.2:
            return 'Uniform'
        else:
            return 'Non-uniform'
    
    # Correlation
    if feature == 'correlation':
        if row['correlation'] > 0.8:
            return 'Highly Linearly-related'
        elif row['correlation'] > 0.6:
            return 'Moderately Linearly-related'
        else:
            return 'Non-linearly-related'
    
    if feature not in ('contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation'):
        return None


def get_feature_column(row, feature):
    textures_results = row['textures_results']
    if feature not in ('contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation'):
        return None
    return textures_results[feature]

In [117]:
df['textures_results'] = df['image_name'].apply(extract_texture_features)


The image is already in grayscale.
(220, 220, 3)
The image is already in grayscale.
The image is already in grayscale.
The image is already in grayscale.


In [118]:
df['contrast'] = df.apply(lambda row: get_feature_column(row, 'contrast'), axis=1)
df['contrast_classification'] = df.apply(lambda row: classify_texture(row, 'contrast'), axis=1)

df['dissimilarity'] = df.apply(lambda row: get_feature_column(row, 'dissimilarity'), axis=1)
df['dissimilarity_classification'] = df.apply(lambda row: classify_texture(row, 'dissimilarity'), axis=1)

df['homogeneity'] = df.apply(lambda row: get_feature_column(row, 'homogeneity'), axis=1)
df['homogeneity_classification'] = df.apply(lambda row: classify_texture(row, 'homogeneity'), axis=1)

df['energy'] = df.apply(lambda row: get_feature_column(row, 'energy'), axis=1)
df['energy_classification'] = df.apply(lambda row: classify_texture(row, 'energy'), axis=1)

df['correlation'] = df.apply(lambda row: get_feature_column(row, 'correlation'), axis=1)
df['correlation_classification'] = df.apply(lambda row: classify_texture(row, 'correlation'), axis=1)


In [119]:
df['use_space_score'] = df['image_name'].apply(analyze_use_of_space)
df['use_space_classification'] = df['use_space_score'].apply(classify_use_of_space)

An error occurred while converting the image to grayscale: OpenCV(4.7.0) /Users/xperience/GHA-OCV-Python/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'



In [120]:
df['simmetry_score'] = df['image_name'].apply(calculate_symmetry_score)
df['simmetry_classification'] = df['simmetry_score'].apply(classify_symmetry)

An error occurred during image processing: OpenCV(4.7.0) /Users/xperience/GHA-OCV-Python/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'



In [173]:
df.to_csv(f"billboard_stats.csv", index=False)


In [162]:
def per_classify_warm_cool_palette(color_percentages):
    warm_weight = 0
    cool_weight = 0

    if pd.isna(color_percentages):
        color_percentages = None
    else:
        color_percentages = color_percentages
        
    if color_percentages:
        color_percentages = ast.literal_eval(color_percentages)
        for color, percentage in color_percentages:
            # Normalize the RGB color
            normalized_color = [x / 255.0 for x in color]
            
            # Convert RGB to HSV
            hsv_color = rgb_to_hsv(np.array([normalized_color]).reshape(1,1,3)).flatten()
            hue = hsv_color[0] * 360
            
            # Define warm and cool hue ranges and add the weight (percentage)
            if 0 <= hue < 180:
                warm_weight += percentage
            elif 180 <= hue <= 360:
                cool_weight += percentage
        
        # Determine if the palette is warm, cool, or neutral based on the weighted counts
        if warm_weight > cool_weight:
            return 'Warm'
        elif cool_weight > warm_weight:
            return 'Cool'
        else:
            return 'Neutral'
    else:
        return 'Non classified'

In [163]:
def classify_color_palette(color_percentages):
    
    if pd.isna(color_percentages):
        dominant_colors = None
    else:
        dominant_colors = color_percentages

    if dominant_colors:
        dominant_colors = ast.literal_eval(dominant_colors)
        rgb_values = [item[0] for item in dominant_colors]
        rgb_values = np.array(rgb_values)

        hsv_colors = np.array([rgb_to_hsv(color/255) for color in rgb_values])
        hues = hsv_colors[:, 0] * 360  # Convert hue to degrees for easier analysis
        
        hue_differences = np.sort((np.diff(np.sort(hues)) + 180) % 360 - 180)
        try:
            min_diff = np.min(hue_differences)
            max_diff = np.max(hue_differences)
        except Exception as e:
            print(dominant_colors)
            return 'Non classified'
        else:
            if np.all(hue_differences < 30):  # Monochromatic threshold
                return 'Monochromatic'
            elif np.all(hue_differences < 60):  # Analogous threshold
                return 'Analogous'
            elif np.any(np.isclose(hue_differences, 180, atol=10)):  # Complementary threshold
                return 'Complementary'
            elif len(hues) >= 3 and np.all(np.isclose(hue_differences, 120, atol=10)):  # Triadic threshold, needs at least 3 colors
                return 'Triadic'
            else:
                return 'Complex/Mixed'
    else:
        return 'Non classified'

In [164]:
df['warm_cool_palette'] = df['dominant'].apply(per_classify_warm_cool_palette)


In [165]:
df['type_palette'] = df['dominant'].apply(classify_color_palette)


[([2.0, 3.0, 5.0], 100.0)]


In [166]:
df

Unnamed: 0,index_column,year,rank,artist_name,album_name,image_url,album_link,genre,artist_country,artist_gender,...,energy,energy_classification,correlation,correlation_classification,use_space_score,use_space_classification,simmetry_score,simmetry_classification,warm_cool_palette,type_palette
0,0,1945,,Glenn Miller & His Orchestra,Glenn Miller,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Glenn_Miller_(19...,"Dance band, Swing",US,,...,0.193391,Somewhat Uniform,0.718747,Moderately Linearly-related,0.866302,High use of negative space,0.907503,High Symmetry,Warm,Monochromatic
1,1,1945,,Bing Crosby,Merry Christmas,https://upload.wikimedia.org/wikipedia/commons...,https://en.wikipedia.org/wiki/Merry_Christmas_...,Christmas,US,male,...,0.024516,Non-uniform,0.395102,Non-linearly-related,0.864793,High use of negative space,0.601069,Moderate Symmetry,Cool,Complex/Mixed
2,2,1945,,Bing Crosby,Selections from Going My Way,https://upload.wikimedia.org/wikipedia/commons...,https://en.wikipedia.org/wiki/Selections_from_...,Popular,US,male,...,0.015525,Non-uniform,0.715769,Moderately Linearly-related,0.888463,High use of negative space,0.529366,Moderate Symmetry,Warm,Complementary
3,4,1945,,The King Cole Trio,The King Cole Trio,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/The_King_Cole_Tr...,Jazz,US,,...,0.017658,Non-uniform,0.596414,Non-linearly-related,0.890253,High use of negative space,0.712177,Moderate Symmetry,Warm,Monochromatic
4,6,1946,,Bing Crosby,Selections from The Bells of St. Mary's,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Selections_from_...,Popular,US,male,...,0.040161,Somewhat Uniform,0.481083,Non-linearly-related,0.902665,High use of negative space,0.566515,Moderate Symmetry,Cool,Complex/Mixed
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3125,3317,2023,188.0,Quavo & Takeoff,Only Built For Infinity Links,https://charts-static.billboard.com/img/2022/1...,,,US,,...,0.010536,Non-uniform,0.525088,Non-linearly-related,0.754259,Moderate use of negative space,0.700477,Moderate Symmetry,Warm,Monochromatic
3126,3318,2023,192.0,Lil Wayne,I Am Music,https://charts-static.billboard.com/img/2023/0...,,,US,male,...,0.143658,Somewhat Uniform,0.817076,Highly Linearly-related,0.928179,High use of negative space,0.785427,Moderate Symmetry,Warm,Monochromatic
3127,3319,2023,193.0,Jimin,FACE (EP),https://charts-static.billboard.com/img/2023/0...,,,KR,male,...,0.032863,Somewhat Uniform,0.927288,Highly Linearly-related,0.966944,High use of negative space,0.422067,Low Symmetry,Warm,Monochromatic
3128,3320,2023,195.0,SEVENTEEN,SEVENTEEN 10th Mini Album: FML (EP),https://charts-static.billboard.com/img/2023/0...,,,KR,,...,0.741746,Uniform,0.533606,Non-linearly-related,0.958333,High use of negative space,0.999923,High Symmetry,Warm,Monochromatic


In [167]:
df.to_csv(f"billboard_stats_dominant.csv", index=False)
df = pd.read_csv(f"billboard_stats_dominant.csv",  header=0)

In [168]:
df = pd.read_csv(f"billboard_stats_dominant.csv",  header=0)

In [170]:
pop_df = df[df['big_genres'].str.contains("Pop", na=False)]
pop_df

Unnamed: 0,index_column,year,rank,artist_name,album_name,image_url,album_link,genre,artist_country,artist_gender,...,energy,energy_classification,correlation,correlation_classification,use_space_score,use_space_classification,simmetry_score,simmetry_classification,warm_cool_palette,type_palette
2,2,1945,,Bing Crosby,Selections from Going My Way,https://upload.wikimedia.org/wikipedia/commons...,https://en.wikipedia.org/wiki/Selections_from_...,Popular,US,male,...,0.015525,Non-uniform,0.715769,Moderately Linearly-related,0.888463,High use of negative space,0.529366,Moderate Symmetry,Warm,Complementary
4,6,1946,,Bing Crosby,Selections from The Bells of St. Mary's,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Selections_from_...,Popular,US,male,...,0.040161,Somewhat Uniform,0.481083,Non-linearly-related,0.902665,High use of negative space,0.566515,Moderate Symmetry,Cool,Complex/Mixed
6,8,1946,,Frank Sinatra,The Voice of Frank Sinatra,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/The_Voice_of_Fra...,Traditional pop,US,male,...,0.066479,Somewhat Uniform,0.692183,Moderately Linearly-related,0.899091,High use of negative space,0.740657,Moderate Symmetry,Cool,Complex/Mixed
9,11,1947,,Al Jolson,[note 2],https://i.discogs.com/lbLeF_zyrQp5kGPtCwwu8CJO...,https://en.wikipedia.org#cite_note-Al_Jolson-8,,US,male,...,0.438060,Uniform,0.837420,Highly Linearly-related,0.935143,High use of negative space,0.998090,High Symmetry,Warm,Complex/Mixed
11,14,1948,,Bing Crosby,St. Patrick's Day,https://upload.wikimedia.org/wikipedia/commons...,https://en.wikipedia.org/wiki/St._Patrick%27s_...,"Popular, Irish",US,male,...,0.018251,Non-uniform,0.314345,Non-linearly-related,0.834855,High use of negative space,0.479198,Low Symmetry,Warm,Complex/Mixed
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3109,3301,2023,136.0,JVKE,This Is What ____ Feels Like (Vol.1-4),https://charts-static.billboard.com/img/2022/1...,,,US,male,...,0.024415,Non-uniform,0.376652,Non-linearly-related,0.873765,High use of negative space,0.958447,High Symmetry,Warm,Analogous
3119,3311,2023,169.0,Ed Sheeran,-,https://charts-static.billboard.com/img/2023/0...,,,GB,male,...,0.070310,Somewhat Uniform,0.504561,Non-linearly-related,0.955525,High use of negative space,0.982350,High Symmetry,Warm,Monochromatic
3123,3315,2023,178.0,TWICE,Ready To Be: 12th Mini Album (EP),https://charts-static.billboard.com/img/2023/0...,,,KR,,...,0.011098,Non-uniform,0.516310,Non-linearly-related,0.819568,High use of negative space,0.685859,Moderate Symmetry,Cool,Complex/Mixed
3127,3319,2023,193.0,Jimin,FACE (EP),https://charts-static.billboard.com/img/2023/0...,,,KR,male,...,0.032863,Somewhat Uniform,0.927288,Highly Linearly-related,0.966944,High use of negative space,0.422067,Low Symmetry,Warm,Monochromatic
