# Usefull imports for graphs

In [None]:
import os
import datetime
import squarify
import webcolors
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm
from dotenv import load_dotenv
from wordcloud import WordCloud
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine, text

load_dotenv()

# Methods to get & clean the metadatas

In [None]:
def get_metadata_from_postgres_db():
    """
    Get the metadata from the PostgreSQL database

    :param db_name: The name of the database
    :param user: The username to connect to the database
    :param password: The password to connect to the database
    :param host: The hostname or IP address of the database server
    :param port: The port number to connect to the database server
    :return: A dictionary with the metadata
    """
    print("Connecting to database...")

    # Create the database engine
    engine = create_engine("postgresql://postgres:postgres@postgres:5432/raw_metadata")

    # Create a session
    Session = sessionmaker(bind=engine)
    session = Session()

    print("Retrieving metadata from database...")

    # Retrieve the metadata
    result = session.execute(
        text("""SELECT filename, Make, Model, Software, BitsPerSample, ImageWidth, ImageHeight, ImageDescription,
    Orientation, Copyright, DateTime, DateTimeOriginal, DateTimeDigitized, SubSecTimeOriginal,
    ExposureTime, FNumber, ExposureProgram, ISOSpeedRatings, SubjectDistance, ExposureBiasValue,
    Flash, FlashReturnedLight, FlashMode, MeteringMode, FocalLength, FocalLengthIn35mm,
    Latitude, LatitudeDegrees, LatitudeMinutes, LatitudeSeconds, LatitudeDirection,
    Longitude, LongitudeDegrees, LongitudeMinutes, LongitudeSeconds, LongitudeDirection,
    Altitude, DOP, FocalLengthMin, FocalLengthMax, FStopMin, FStopMax, LensMake, LensModel,
    FocalPlaneXResolution, FocalPlaneYResolution, tags, dominant_color
    FROM image_metadata;""")
    )

    keys = [
        'filename', 'Make', 'Model', 'Software', 'BitsPerSample', 'ImageWidth', 'ImageHeight', 'ImageDescription',
        'Orientation', 'Copyright', 'DateTime', 'DateTimeOriginal', 'DateTimeDigitized', 'SubSecTimeOriginal',
        'ExposureTime', 'FNumber', 'ExposureProgram', 'ISOSpeedRatings', 'SubjectDistance', 'ExposureBiasValue',
        'Flash', 'FlashReturnedLight', 'FlashMode', 'MeteringMode', 'FocalLength', 'FocalLengthIn35mm',
        'Latitude', 'LatitudeDegrees', 'LatitudeMinutes', 'LatitudeSeconds', 'LatitudeDirection',
        'Longitude', 'LongitudeDegrees', 'LongitudeMinutes', 'LongitudeSeconds', 'LongitudeDirection',
        'Altitude', 'DOP', 'FocalLengthMin', 'FocalLengthMax', 'FStopMin', 'FStopMax', 'LensMake', 'LensModel',
        'FocalPlaneXResolution', 'FocalPlaneYResolution', 'tags', 'dominant_color'
    ]

    # Convert the metadata to a dictionary
    metadata_dict = {}
    for row in tqdm(result, desc="Get metadata from database"):
        try:
            # use row to find
            metadata_dict[row[0]] = {}
            for i in range(len(keys)):
                metadata_dict[row[0]][keys[i]] = row[i]
        except Exception as e:
            print(e, row)

    # Close the session
    session.close()

    return metadata_dict

    def dms_to_decimal(degrees, minutes, seconds):
        """
        Convert DMS (degrees, minutes, seconds) coordinates to DD (decimal degrees)
        :param degrees: degrees
        :param minutes: minutes
        :param seconds:  seconds
        :return: decimal coordinates
        """

    decimal_degrees = abs(degrees) + (minutes / 60) + (seconds / 3600)

    if degrees < 0:
        decimal_degrees = -decimal_degrees

    return decimal_degrees


def dms_to_decimal(degrees, minutes, seconds):
    """
    Convert DMS (degrees, minutes, seconds) coordinates to DD (decimal degrees)
    :param degrees: degrees
    :param minutes: minutes
    :param seconds:  seconds
    :return: decimal coordinates
    """
    decimal_degrees = abs(degrees) + (minutes / 60) + (seconds / 3600)

    if degrees < 0:
        decimal_degrees = -decimal_degrees

    return decimal_degrees


def clean_gps_infos(metadata_to_cln):
    """
    Clean the GPS infos

    :param metadata_to_cln: The metadata to clean
    :return: A dictionary with the cleaned metadata
    """

    cpt_valid, cpt_invalid, cpt_converted = 0, 0, 0
    for file in tqdm(metadata_to_cln, desc="Clean GPS values"):
        file_meta = metadata_to_cln[file]

        if 'Latitude' in file_meta:
            has_dms_values = file_meta['LatitudeDegrees'] != '0.000000' or file_meta['LongitudeDegrees'] != '0.000000'
            has_decimal_values = file_meta['Latitude'] != '0.000000' or file_meta['Longitude'] != '0.000000'

            if has_dms_values or has_decimal_values:
                try:
                    should_convert = '.' not in file_meta['Latitude'] and has_dms_values
                except:
                    continue

                if should_convert:
                    # calculate the decimal coordinates from the degrees coordinates
                    latitude = dms_to_decimal(
                        float(file_meta['LatitudeDegrees']),
                        float(file_meta['LatitudeMinutes']),
                        float(file_meta['LatitudeSeconds']))

                    longitude = dms_to_decimal(
                        float(file_meta['LongitudeDegrees']),
                        float(file_meta['LongitudeMinutes']),
                        float(file_meta['LongitudeSeconds']))

                    cpt_converted += 1
                else:
                    # convert the coordinates to float
                    latitude = float(file_meta['Latitude'])
                    longitude = float(file_meta['Longitude'])

                # update the metadata with the calculated latitude and longitude
                metadata_to_cln[file]['Latitude'] = latitude
                metadata_to_cln[file]['Longitude'] = longitude
                cpt_valid += 1

            else:
                metadata_to_cln[file]['Latitude'] = None
                metadata_to_cln[file]['Longitude'] = None
                metadata_to_cln[file]['Altitude'] = None
                cpt_invalid += 1

    print("GPS values : \n",
          "Valid : ", cpt_valid,
          "\nInvalid : ", cpt_invalid,
          "\nConverted : ", cpt_converted,
          )

    return metadata_to_cln


def clean_metadata(metadata_to_clean):
    """
    Clean the metadata
    Remove special characters from the 'Make' property values
    Remove the 'T' and '-' characters from the 'DateTime' property values

    :param metadata_to_clean: The metadata to clean
    :return: A dictionary with the cleaned metadata
    """
    cln_meta = metadata_to_clean.copy()

    # Clean 'Make' property values
    try:
        for file in tqdm(cln_meta, desc="Clean 'Make' property values"):
            if 'Make' in cln_meta[file]:
                cln_meta[file]['Make'] = ''.join(filter(str.isalpha, cln_meta[file]['Make'])).replace('CORPORATION',
                                                                                                      '').replace(
                    'CORP', '').replace('COMPANY', '').replace('LTD', '').replace('IMAGING', '')
    except Exception as e:
        print(e)

    # Clean 'DateTime' property values
    cpt, cpt_error = 0, 0
    date_error = []
    try:

        for file in tqdm(cln_meta, desc="Clean 'DateTime' property values"):
            if 'DateTimeOriginal' in cln_meta[file]:
                date = cln_meta[file]['DateTimeOriginal']
                try:
                    if date is not None:
                        tmp = date.replace('T', ' ').replace('-', ':').split('+')[0]
                        cln_meta[file]['DateTimeOriginal'] = datetime.datetime.strptime(tmp[:19], '%Y:%m:%d %H:%M:%S')
                        # if the year is after actual year, we assume that the date is wrong
                        if cln_meta[file]['DateTimeOriginal'].year > datetime.datetime.now().year:
                            date_error.append(cln_meta[file]['DateTimeOriginal'])
                            cln_meta[file]['DateTimeOriginal'] = None
                            cpt_error += 1
                        else:
                            cpt += 1
                except ValueError:
                    date_error.append(date)
                    cln_meta[file]['DateTimeOriginal'] = None
                    cpt_error += 1
    except Exception as e:
        print(e)

    print(f"Metadata cleaned ! {cpt}/{len(cln_meta)} dates OK, {cpt_error} dates KO")
    print(f"Dates KO : {date_error}")

    # Clean 'tags' property values
    for file in tqdm(cln_meta, desc="Clean 'tags' property values"):
        if 'tags' in cln_meta[file]:
            val = None
            if cln_meta[file]['tags'] is not None:
                val = eval(cln_meta[file]['tags'])
            cln_meta[file]['tags'] = val

    # Clean the GPS infos
    cln_meta = clean_gps_infos(cln_meta)

    return cln_meta


def get_metadata():
    """
    Get the metadata from the database
    :return: A JSON object with the metadata
    """
    # Check if the metadata file already exists
    if os.path.isfile('metadata.csv'):
        # If the file exists, read it
        return pd.read_csv('metadata.csv')
    else:
        # Get the metadata from the database
        # brut_metadata = get_metadata_from_mariadb_db(sql_database, sql_user, sql_password, sql_host)
        brut_metadata = get_metadata_from_postgres_db()
        # Clean the metadata
        cln_metadata = clean_metadata(brut_metadata)
        # Convert the metadata to a DataFrame
        df_metadata = pd.DataFrame.from_dict(cln_metadata).transpose()
        # Fill the 'Make' property NaN values with 'Undefined'
        df_metadata['Make'].fillna('Undefined', inplace=True)

        df_metadata.to_csv('metadata.csv', index=False, mode='w')

        return df_metadata


# Methods for basics graphs

In [None]:
def display_bar(title, x_label, y_label, x_values, y_values, colors=None, rotation=90):
    """
    Display a bar chart

    :param title: The title of the chart
    :param x_label: The x-axis label
    :param y_label: The y-axis label
    :param x_values: The values of the x-axis
    :param y_values: The values of the y-axis
    :param colors: The colors of the bars
    :param rotation: The rotation of the x-axis labels
    """

    fig, ax = plt.subplots()
    ax.bar(x_values, y_values, color=colors)
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.set_xticklabels(x_values, rotation=rotation)

    return fig


def display_pie(title, values, labels, colors=None, autopct="%1.1f%%", legend_title=None, legend_loc=None,
                legend_margin=None):
    """
    Display a pie chart

    :param title: The title of the chart
    :param values: The values of the chart
    :param labels: The labels of the chart
    :param colors: The colors of the chart
    :param autopct: The percentage format
    :param legend_title: The title of the legend,
    :param legend_loc: The location of the legend
    :param legend_margin: The margin of the legend
    """
    fig, ax = plt.subplots()
    ax.pie(values, labels=labels, autopct=autopct, colors=colors)
    if legend_title is not None or legend_loc is not None or legend_margin is not None:
        ax.legend(title=legend_title, loc=legend_loc, bbox_to_anchor=legend_margin)
    ax.set_title(title)

    return fig


def display_curve(title, x_label, y_label, x_values, y_values, rotation=90):
    """
    Display a curve

    :param title: The title of the curve
    :param x_label: The label of the x_axis
    :param y_label: The label of the y_axis
    :param x_values: The values of the x_axis
    :param y_values: The values of the y_axis
    :param rotation: The rotation of the x_axis labels
    """

    fig, ax = plt.subplots()
    ax.plot(x_values, y_values)
    ax.set_xticklabels(x_values, rotation=rotation)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.set_title(title)

    return fig


def display_histogram(title, x_label, y_label, x_values, bins=10, rotation=90):
    """
    Display a histogram

    :param title: The title of the histogram
    :param x_label: The label of the x_axis
    :param y_label: The label of the y_axis
    :param x_values: The values of the x_axis
    :param bins: The number of bins
    :param rotation: The rotation of the x_axis labels
    """

    fig, ax = plt.subplots()
    ax.hist(x_values, bins=bins)
    ax.set_xticklabels(x_values, rotation=rotation)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.set_title(title)

    return fig


def display_tree_map(title, sizes, labels, colors, alpha=0.6):
    """
    Display a tree map

    :param title: The title of the tree map
    :param sizes: The sizes of the tree map
    :param labels: The labels of the tree map
    :param colors: The colors of the tree map
    :param alpha: The alpha of the tree map
    """
    fig, ax = plt.subplots()
    squarify.plot(sizes=sizes, label=labels, color=colors, alpha=alpha, ax=ax)
    ax.set_title(title)

    return fig


def word_color_func(word, *args, **kwargs):
    """
    Get the corresponding color for a given word

    :param word: color name to find the corresponding color
    :return: the corresponding color in hex format
    """
    try:
        color_hex = webcolors.name_to_hex(word)
        return color_hex
    except ValueError:
        return 'black'


def display_wordcloud(words, frequencies, background_color='white', max_words=200, word_to_color=False):
    """
    Display a word cloud

    :param words: words to display
    :param frequencies: frequencies of the words
    :param background_color: background color of the word cloud
    :param max_words: maximum number of words to display
    :param word_to_color: if True, the words will be colored according to their name
    :return: the word cloud as a buffer
    """

    # Set the color function to convert the words to colors if needed
    if word_to_color:
        color_func = word_color_func
    else:
        color_func = None

    # Generate the word cloud
    return WordCloud(
        background_color=background_color,
        max_words=max_words,
        color_func=color_func,  # Add the color_func parameter
    ).generate_from_frequencies(dict(zip(words, frequencies)))


# Your space !