In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta 
import import_ipynb

from Image_Utilities import showImageMetadata, readImage, showImage
from Args import preWorkArguments
from Data_Utilities import load_csv, load_data_dict, filter_data, copy_image_from_to, extendedDataWrite
from Data_Utilities import dataFrameSave

In [None]:
def get_info_from_images(mediaPath, imageNames):
    """
    This method returns the total size of the images given
    """
    size = 0
    for n in imageNames:
        if exists(mediaPath+"WORK/f1/"+n):
            size = size + os.stat(mediaPath+"WORK/f1/"+n).st_size
        else:
            print("FILE ",mediaPath+"WORK/f1/"+n," DON'T EXIST")
    return size

def copy_data_from_to(imagesOriginPath, imagesTargetPath, imageDataDestinationPath, dataDict, pbData, bData):
    """
    This method copy images from one folder to another
    """
    answer1, answer2 = "",""
    while answer1 != "y" and answer1 != "n":
        answer1 = input("Are you sure you want to delete current data.csv an overwrite it?(y/n)")
        
    if answer1 == "n":
        sys.exit()
    
    while answer2 != "y" and answer2 != "n":
        answer2 = input("This action can also delete and copy new images. Do you want to?(y/n)")
        
    if answer2 == "y":
        print("IMAGE COPY WILL BE PERFORM. THIS CAN TAKE A LONG TIME")
        
    with open(imageDataDestinationPath, "w+") as f:
        writer = csv.writer(f)
        for d in pbData:
            writer.writerow([d, dataDict[d.replace("JPG","jpg")]])
            if answer2 == "y":
                copy_image_from_to(imagesOriginPath+d,imagesTargetPath+d)
            
        for d in bData:
            writer.writerow([d, dataDict[d.replace("JPG","jpg")]])
            if answer2 == "y":
                copy_image_from_to(imagesOriginPath+d,imagesTargetPath+d)


                
                
def showImagesByRP(path, df, onlyRP=None):
    """
    This method shows images from one specific RP
    """
    rpDf = df.sort_values(by=['RP'])
    currentRP = ""
    for index, row in rpDf.iterrows():
        if onlyRP is None: 
            if currentRP != rpDf.at[index, 'RP']:
                currentRP = rpDf.at[index, 'RP']
            
            nextRPImage(path+rpDf.at[index,'name'])
        else:        
            if rpDf.at[index,'RP'] == onlyRP:
                nextRPImage(path+rpDf.at[index,'name'])


def nextRPImage(path):
    """
    This method shows an image and ask if the user wants to go to the next image or end
    """
    image = readImage(path)
    showImage(image)
    answer = input("empty enter = continue next image || else = exit")
    if answer != "":
        sys.exit()
    

def showMetadataTotals(imagePath, path, csvName, sortedCsvName, RPDict):
    """
    This method shows dataSorted.csv metadata
    """
    data = load_csv(path+csvName)
    headers = data[0]
    data = data[1:]
    
    model = data[:,3]
    
    titles, counts = np.unique(model, return_counts=True)
 
    plt.barh(range(len(titles)), counts, tick_label=titles)
    plt.show()

    dataFrameCreation(imagePath, data, headers, path+sortedCsvName, RPDict)

def showDorsalesMetadataTotals(path):
    """
    This method shows the metadata related to the runner bib if it happens to be in one of the RP
    """
    df = pd.read_csv(path)
    print(df)

    for c in range(1,len(df.columns)):
        print(df[df.columns[c]].value_counts())       
    
    
    fig, (ax0, ax1, ax2, ax25, ax3, ax4, ax5, ax6, ax7, 
          ax8, ax9, ax10, ax12, ax11, ax13) = plt.subplots(15, figsize=(5,10))
    fig.tight_layout()
    pd.value_counts(df['RP0']).plot(kind='barh', ax=ax0, title="RP0")
    pd.value_counts(df['RP1']).plot(kind='barh', ax=ax1, title="RP1")
    pd.value_counts(df['RP2']).plot(kind='barh', ax=ax2, title="RP2")
    pd.value_counts(df['RP2,5']).plot(kind='barh', ax=ax25, title="RP12,5")
    pd.value_counts(df['RP3']).plot(kind='barh', ax=ax3, title="RP3")
    pd.value_counts(df['RP4']).plot(kind='barh', ax=ax4, title="RP4")
    pd.value_counts(df['RP5']).plot(kind='barh', ax=ax5, title="RP5")
    pd.value_counts(df['RP6']).plot(kind='barh', ax=ax6, title="RP6")
    pd.value_counts(df['RP7']).plot(kind='barh', ax=ax7, title="RP7")
    pd.value_counts(df['RP8']).plot(kind='barh', ax=ax8, title="RP8")
    pd.value_counts(df['RP9']).plot(kind='barh', ax=ax9, title="RP9")
    pd.value_counts(df['RP10']).plot(kind='barh', ax=ax10, title="RP10")
    pd.value_counts(df['RP11']).plot(kind='barh', ax=ax11, title="RP11")
    pd.value_counts(df['RP12']).plot(kind='barh', ax=ax12, title="RP12")
    pd.value_counts(df['RP13']).plot(kind='barh', ax=ax13, title="RP13")
    
    ax0.set_xticks(np.arange(0, 1000, 100))
    ax1.set_xticks(np.arange(0, 1000, 100))
    ax2.set_xticks(np.arange(0, 1000, 100))
    ax3.set_xticks(np.arange(0, 1000, 100))
    ax4.set_xticks(np.arange(0, 1000, 100))
    ax5.set_xticks(np.arange(0, 1000, 100))
    ax6.set_xticks(np.arange(0, 1000, 100))
    ax7.set_xticks(np.arange(0, 1000, 100))
    ax8.set_xticks(np.arange(0, 1000, 100))
    ax9.set_xticks(np.arange(0, 1000, 100))
    ax10.set_xticks(np.arange(0, 1000, 100))
    ax11.set_xticks(np.arange(0, 1000, 100))
    ax12.set_xticks(np.arange(0, 1000, 100))
    ax13.set_xticks(np.arange(0, 1000, 100))
    plt.show()
    
    return df
    
    
def dataFrameCreation(imagePath, data, headers, path, RPDict):
    """
    Generates a DataFrame from a csv
    """
    dates = []
    for d in data[:,4]:
        dates.append(datetime.strptime(d, "%Y:%m:%d %H:%M:%S"))
    
    dates = dates.sort()
    
    df = pd.DataFrame(data, columns = headers)
    df["DateTime"] = pd.to_datetime(df["DateTime"], format="%Y:%m:%d %H:%M:%S")
    df.sort_values(by="DateTime", inplace=True)
    df['Artist'] = df['Artist'].str.strip()
    df = df.reset_index(drop=True)
    
    preRenamedDF = df.copy()
    i = 1
    for index, row in df.iterrows():
        df.at[index, 'name'] = "%05d" %(i) + row['name']
        i = i+1
    print(df.describe(include="all"))
    
    answer = ""    
    while answer!= "y" and answer != "n":
        answer = input("Do you want to Rename images with prefix?(y/n)")
    
    if answer == "y":
        for index, row in preRenamedDF.iterrows():
            try:
                os.rename(path+'Images/'+row['name'], path+df.at[index, 'name'])
            except:
                print("File not found either "+ path+'Images/'+row['name'] + " OR " + row['name'], path+df.at[index, 'name'])
                break

    df['RP'] = np.nan
    for index, row in df.iterrows():
        thisRP = True
        for key, rp in RPDict.items():
            if rp['GPSInfo'] is not None or rp['Artist'] is not None:
                if row['GPSInfo'] == "" or rp['GPSInfo'] == "":
                    if row['Artist'] == rp['Artist'] and row['Model'] == rp['Model']:
                        if not rp['Empty']:
                            df.at[index, 'RP'] = key
                            rp['count'] = rp['count']+1
                            thisRP = False
                            break
                else:
                    if ( (int(row['GPSInfo'])+1000 >= int(rp['GPSInfo']) or 
                        int(row['GPSInfo'])+1000 <= int(rp['GPSInfo'])) 
                        and row['Artist'] == rp['Artist'] ):
                        if row['Model'] == rp['Model']:
                            if not rp['Empty']:
                                df.at[index, 'RP'] = key
                                rp['count'] = rp['count']+1
                                break
        if thisRP:
            for key, rp in RPDict.items():
                if rp['Empty']:
                    df.at[index, 'RP'] = key
                    rp['GPSInfo'] = row['GPSInfo']
                    rp['Model'] = row['Model']
                    rp['Empty'] = False
                    rp['Artist'] = row['Artist']
                    rp['count'] = rp['count']+1
                    thisRP = False
                    break
            
    for key, data in RPDict.items():
        print(key, data)
    
    answer = ""    
    while answer!= "y" and answer != "n":
        answer = input("Do you want to save this dataframe as dataSorted.csv?(y/n)")
    
    if answer == "y":
        dataFrameSave(df)
    
    answer = ""
    while answer!= "y" and answer != "n":
        answer = input("Do you want to check images?(y/n)")
    
    if answer == "y":
        answer = input("Do you want to check specific RP?(RPX/n)")
        if answer != "n":
            showImagesByRP(imagePath, df, answer)
        else:
            showImagesByRP(imagePath, df)
    
    
def numbersCSV(path, csvName, dorsalCsvName):
    """
    This method saves a DataFrame into a csv. This DataFrame represents every row as a 
    runner bib and every column as a boolean value of if the dorsal is in an RP.
    """
    data = load_csv(path+csvName)
    
    numbers = sorted(getNumbers(data))
    data = getRPbyNumber(data, numbers, ['dorsal', 'RP0','RP1','RP2','RP2,5','RP3','RP4','RP5','RP6','RP7','RP8','RP9','RP10','RP11','RP12','RP13'])
    
    dataFrameSave(data, path+dorsalCsvName)
    
    
def getNumbers(data):
    """
    This method returns every runner bib without repetition
    """
    numbers = []
    for n in data[1:,1]:
        if n.isnumeric():
            if int(n) not in numbers:
                numbers.append(int(n))
        else:
            ns = n.split()
            for ni in ns:
                if ni.isnumeric():
                    if int(ni) not in numbers:
                        numbers.append(int(ni))
    return numbers

def getRPbyNumber(data, numbers, headers):
    """
    This method returns the numbersCSV DataFrame
    """
    df = pd.DataFrame(columns=headers)
    df.iloc[:, 0] = numbers
    df.iloc[:,1:] = False
    
    for i in range(1,len(df.columns)):
        print(df.columns[i])
        dorsalesRP = getNumbersOfRP(data, df.columns[i])
        for d in dorsalesRP:
            df.iloc[df.index[df['dorsal']==d],i] = True
    
    print(df)
    return df

def getNumbersOfRP(data, RP):
    """
    This method returns every runner bib found in a specific RP
    """
    result = []
    for d in data:
        if d[-1] == RP:
            if d[1].isnumeric():
                if int(d[1]) not in result:
                    result.append(int(d[1]))
            else:
                ns = d[1].split()
                for n in ns:
                    if n.isnumeric():
                        if int(n) not in result:
                            result.append(int(n))
    
    return result
    

In [None]:
#Main
def preWorkMain(args):
    """
    This method manages the previous work
    """
    answer = ""
    while answer!= "y" and answer != "n":
        answer = input("Is the hard disk SEGURO connected?(y/n)")
    
    if answer == "y":
        
        dataDict = load_data_dict(args["mediaPath"]+args["csvName"])

        fullListOfImages = os.listdir(args["mediaPath"]+"WORK/f1")
        print("IMAGE TOTAL: ",len(fullListOfImages))

        blueImages, partialBlueImages = filter_data(dataDict)


        pbSize = get_info_from_images(args["mediaPath"], partialBlueImages)
        bSize = get_info_from_images(args["mediaPath"], blueImages)

        print("Partial Blue: ", len(partialBlueImages))
        print("Partial Blue image size: {}GB".format(pbSize*1e-9))
        print("blue: ", len(blueImages))
        print("Blue image size: {}GB".format(bSize*1e-9))

        answer = ""
        while answer!= "y" and answer != "n":
            answer = input("Execute data copyh?(y/n)")

        if answer == "y":
            copy_data_from_to(args["mediaPath"]+"WORK/f1/", 
                              args["destinationPath"], 
                              args["mediaPath"]+args["dataCsvName"], 
                              dataDict, 
                              partialBlueImages, 
                              blueImages)
        
    answer = ""
    while answer!= "y" and answer != "n":
        answer = input("Do you want to extract extra information from images?(y/n)")
    
    if answer == "y":
        answer = ""
        while answer!= "y" and answer != "n":
            answer = input("This process will overwrite data2.csv. Do you want to proceed?(y/n)")
    
        if answer == "y":
            extendedDataWrite(args["destinationPath"], args["destinationPath"]+args["dataCsvName"], args["destinationPath"]+args["extendedDataCsvName"], args["resultData"])
    
    
    answer = ""
    while answer!= "y" and answer != "n":
        answer = input("Do you want to see totals of metadata?(y/n)")
    
    if answer == "y":
        showMetadataTotals(args["destinationPath"], args["transGranCanariaFolder"], args["extendedDataCsvName"], args["sortedDataCsvName"], args["RPDict"])
    
    
    answer = ""
    while answer!= "y" and answer != "n":
        answer = input("Do you want to generate a csv for runner bib?(y/n)")
    
    if answer == "y":
        numbersCSV(args["transGranCanariaFolder"], args["sortedDataCsvName"], args["dorsalCsvName"])
        
    answer = ""
    while answer!= "y" and answer != "n":
        answer = input("Do you want to get dorsales.csv metadata?(y/n)")
    
    if answer == "y":
        showDorsalesMetadataTotals(args["transGranCanariaFolder"]+args["dorsalCsvName"])
    
    
        