<a href="https://colab.research.google.com/github/Ircken/Codigo/blob/main/juliaVDrive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Instalacion

In [None]:
%%shell
set -e

#---------------------------------------------------#
JULIA_VERSION="1.7.1" # any version ≥ 0.7.0
JULIA_PACKAGES="IJulia BenchmarkTools Plots"
JULIA_PACKAGES_IF_GPU="CUDA" # or CuArrays for older Julia versions
JULIA_NUM_THREADS=2
#---------------------------------------------------#

if [ -n "$COLAB_GPU" ] && [ -z `which julia` ]; then
  # Install Julia
  JULIA_VER=`cut -d '.' -f -2 <<< "$JULIA_VERSION"`
  echo "Installing Julia $JULIA_VERSION on the current Colab Runtime..."
  BASE_URL="https://julialang-s3.julialang.org/bin/linux/x64"
  URL="$BASE_URL/$JULIA_VER/julia-$JULIA_VERSION-linux-x86_64.tar.gz"
  wget -nv $URL -O /tmp/julia.tar.gz # -nv means "not verbose"
  tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1
  rm /tmp/julia.tar.gz

  # Install Packages
  if [ "$COLAB_GPU" = "1" ]; then
      JULIA_PACKAGES="$JULIA_PACKAGES $JULIA_PACKAGES_IF_GPU"
  fi
  for PKG in `echo $JULIA_PACKAGES`; do
    echo "Installing Julia package $PKG..."
    julia -e 'using Pkg; pkg"add '$PKG'; precompile;"' &> /dev/null
  done

  # Install kernel and rename it to "julia"
  echo "Installing IJulia kernel..."
  julia -e 'using IJulia; IJulia.installkernel("julia", env=Dict(
      "JULIA_NUM_THREADS"=>"'"$JULIA_NUM_THREADS"'"))'
  KERNEL_DIR=`julia -e "using IJulia; print(IJulia.kerneldir())"`
  KERNEL_NAME=`ls -d "$KERNEL_DIR"/julia*`
  mv -f $KERNEL_NAME "$KERNEL_DIR"/julia  

  echo ''
  echo "Successfully installed `julia -v`!"
  echo "Please reload this page (press Ctrl+R, ⌘+R, or the F5 key) then"
  echo "jump to the 'Checking the Installation' section."
fi

Installing Julia 1.7.1 on the current Colab Runtime...
2022-06-25 16:53:21 URL:https://storage.googleapis.com/julialang2/bin/linux/x64/1.7/julia-1.7.1-linux-x86_64.tar.gz [123374573/123374573] -> "/tmp/julia.tar.gz" [1]
Installing Julia package IJulia...
Installing Julia package BenchmarkTools...
Installing Julia package Plots...


# Checking the Installation
The `versioninfo()` function should print your Julia version and some other info about the system:

In [None]:
versioninfo()

Julia Version 1.7.1
Commit ac5cc99908 (2021-12-22 19:35 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU @ 2.30GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-12.0.1 (ORCJIT, haswell)
Environment:
  JULIA_NUM_THREADS = 2


In [None]:
using BenchmarkTools

M = rand(2^11, 2^11)

@btime $M * $M;

  477.897 ms (2 allocations: 32.00 MiB)


In [None]:
if ENV["COLAB_GPU"] == "1"
    using CUDA

    run(`nvidia-smi`)

    # Create a new random matrix directly on the GPU:
    M_on_gpu = CUDA.CURAND.rand(2^11, 2^11)
    @btime $M_on_gpu * $M_on_gpu; nothing
else
    println("No GPU found.")
end

Sun Jan  9 02:26:33 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P8    28W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Codigo

In [2]:
import Pkg; Pkg.add("DataFrames")
import Pkg; Pkg.add("Statistics")
import Pkg; Pkg.add("CSV")
import Pkg; Pkg.add("Suppressor")
import Pkg; Pkg.add("Dates")
""

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m Formatting ────────────────── v0.4.2
[32m[1m   Installed[22m[39m IteratorInterfaceExtensions ─ v1.0.0
[32m[1m   Installed[22m[39m DataAPI ───────────────────── v1.10.0
[32m[1m   Installed[22m[39m Crayons ───────────────────── v4.1.1
[32m[1m   Installed[22m[39m PooledArrays ──────────────── v1.4.2
[32m[1m   Installed[22m[39m Tables ────────────────────── v1.7.0
[32m[1m   Installed[22m[39m TableTraits ───────────────── v1.0.1
[32m[1m   Installed[22m[39m DataValueInterfaces ───────── v1.0.0
[32m[1m   Installed[22m[39m InvertedIndices ───────────── v1.1.0
[32m[1m   Installed[22m[39m Reexport ──────────────────── v1.2.2
[32m[1m   Installed[22m[39m Compat ────────────────────── v3.45.0
[32m[1m   Installed[22m[39m OrderedCollections ────────── v1.4.1
[32m[1m   Installed[22m[39m DataStr

""

In [None]:
using DataFrames
using Statistics
using CSV
using Suppressor
using Dates

In [None]:
function inicializar()
    out = DataFrame(Hora = String[], kWhcount = Float64[], count = Int64[])
    rename!(out, [:"Hora", :"kWh*count", :"count"])

    for i in 0:23
        for j in 0:3
            append!(out[!, "Hora"],[hora(i, j)])
            append!(out[!, "kWh*count"],[0.0])
            append!(out[!, "count"],[0])
        end
    end    
    
    return out
end

inicializar (generic function with 1 method)

In [None]:
function hora(i, j)      
    if i != 0
        if i < 10
            h = "0" * string(i)
        else
            h = string(i)
        end
    else
        h = "00"  
    end   

    if j != 0
        m = string(j*15)
    else
        m = "00"    
    end   
    
    return h * ":" * m * ":" * "00"
end

hora (generic function with 1 method)

In [None]:
function sumar_columnas(out, df)
    
    if !(isempty(out)) || !(isempty(df))
            out[!, 2] .= out[!, 2] + df[!, 2]
            out[!, 3] .= out[!, 3] + df[!, 3]
        return out
    end
    
end

sumar_columnas (generic function with 1 method)

In [None]:
function df_por_tarifa(df)

    aux = DataFrame()
    out = DataFrame(Hora = String[], kWhcount = Float64[], count = Int64[])
    rename!(out, [:"Hora", :"kWh*count", :"count"])

    for i in 0:23
        for j in 0:3
            horario = hora(i, j)
            aux = filter(:"Hora" => n -> n == horario, df)

            kWh = mean(aux[!, 2])
            count = length(aux[!, 2])
            prod = kWh*count

            #push!(aux, [horario prod count])
            append!(out[!, "Hora"],[horario])
            append!(out[!, "kWh*count"],[prod])
            append!(out[!, "count"],[count])        
        end
    end
    return out
    
end

df_por_tarifa (generic function with 1 method)

In [None]:
# obtener tarifa
function t_n_f_index(archivo)

    open(archivo, "r") do f
        lines = readlines(f)

        tariff_index = findfirst(t -> occursin("Servicio:", t), lines) # indices de tarifa y fecha
                             
        # obtener tipo de tarifa
        tariff_vector = split(lines[tariff_index], ";")
        filter!(x -> x != " ", tariff_vector) # eliminar espacio vacio 
        filter!(x -> x != "", tariff_vector) # eliminar espacio vacio 
        if occursin("Medicion:", tariff_vector[2])
            tariff_vector[2] = "Medicion:"
        end
        
        # tarifa e indice de fecha
        return tariff_vector[2]
    end
    
end

t_n_f_index (generic function with 1 method)

In [None]:
function limpiarTexto(f)

    lines = readlines(f)
    fecha_in = findfirst(t -> occursin("Fecha", t), lines)
    lines[end] = "" # eliminar ultima linea
    # si existe esa esctructura eliminarla
    if lines[end - 1] == ";;;;;;;;;;;;;;;;"
        lines[end - 1] = "" 
    end

    lines = lines[fecha_in: end] # obtener solo las lineas a partir de la fecha
    lines[1] = replace(lines[1], "\xe1" => "") # sustituir caracteres especiales  
    string = join(lines, "\n") # vector a string para leerlo como csv
    string = replace(string, ";;" => ";") # eliminar duplicados
    return string
    
end

limpiarTexto (generic function with 1 method)

In [None]:
function primeraLimpiezaDF(df)
    
    # renombrar "Fecha hora" a "Fecha Hora"
    if "Fecha hora" in names(df)
        rename!(df, "Fecha hora" => "Fecha Hora")
    end
    select!(df, "Fecha Hora", "kWh") # eliminar las columnas no utiles
    dropmissing!(df, "Fecha Hora") # eliminar los missing
    dropmissing!(df, "kWh") # eliminar los missing
    
    return df
end

primeraLimpiezaDF (generic function with 1 method)

In [None]:
function segundaLimpiezaDF(df)
    
    df[!,2] .= replace.(df[!,2], "," => ".") # coma a punto
    df[!,2] .= parse.(Float64, df[!,2]) # string a float

    transform!(df, :"Fecha Hora" => ByRow(x -> split(x, ' ')) => [:"Fecha", :"Hora"]) # separar fecha y hora
    df = df[:, [2, 4]] # dejar las columnas Fecha y Fecha Hora
    df[!, 1], df[!, 2] = df[!, 2], df[!, 1] # cambiar valores de las columnas
    rename!(df, [:"Hora", :"kWh"]) # renombrar columnas
    
    return df
end

segundaLimpiezaDF (generic function with 1 method)

In [None]:
carpeta = "procesar"
if !isdir(carpeta)
    mkdir(carpeta)
end

"procesar"

In [None]:
# obtener ubicacion de todos los archivos
archivos = Vector{String}()
# recorrer cada carpeta dentro de "carpeta"
for folder in readdir(carpeta)
    
    # recorrer cada archivo dentro de la carpeta dentro de "folder"
    for file in readdir(carpeta * "/" *folder)
        push!(archivos, "$carpeta/$folder/$file")
    end
    
end

In [None]:
println(length(archivos))

0


In [None]:
# obtener vectores de tarifa e indice de fecha
tariff = Vector{String}()

for i in 1:length(archivos)

    archivo = archivos[i] # ORDEN DE LECTURA
    aux = t_n_f_index(archivo)
    # guardar tarifa e indice de fecha
    push!(tariff, aux)

end

In [None]:
tariff_set_list = collect(Set(tariff)) # lista de valores unicos
for i in tariff_set_list
    println("$i -> ", count(==(i), tariff))
end

In [None]:
arch = findall(x -> occursin("Medicion:", x) , tariff)
cant_archivos = length(arch)
println(cant_archivos)
#println(arch[1:cant_archivos]) 
""

0


""

In [None]:
# definir carpeta de salida
carpeta_salida = "excel_out"
if !isdir(carpeta_salida)
    mkdir(carpeta_salida)
end

"excel_out"

In [None]:
# definir tiempo de salida
t = Dates.now() 
formatoTiempo = "$(Dates.hour(t))h$(Dates.second(t))m$(Dates.minute(t))s_$(Dates.Date(t))"
""

""

In [None]:
# para cada tarifa
# recorrer elementos unicos de tarifa
@suppress begin
    for i in 1:length(tariff_set_list)
        
        global contador = 0
        # se opera sobre cada coincidencia de tarifa
        # sin procesar "Medicion:"
        if occursin("Medicion:", tariff_set_list[i]) != true
            global out = inicializar() # inicializar un df generico
            posicion_tarifas = findall(x -> x == tariff_set_list[i] , tariff) # posiciones de tarifa    

            # recorrer cada archivo de cada tarifa
            for indice_tarifa in posicion_tarifas
                archivo = archivos[indice_tarifa] # archivo con ese indice de tarifa
                
                    # abrir archivo
                    open(archivo, "r") do f

                        string = limpiarTexto(f) # preparar texto para df
                        df = CSV.read(IOBuffer(string), DataFrame; delim = ";", header = true) # string a csv                    
                        df = primeraLimpiezaDF(df)

                        if length(df[!, 1]) > 0
                            
                            global contador += 1
                            df = segundaLimpiezaDF(df)
                            df_out = df_por_tarifa(df) # df de solo 24h*(60min/1h)/15min = 96

                            # limpiar NaN
                            for col in eachcol(df_out)
                                replace!(col, NaN => 0.0)
                            end    

                            out = sumar_columnas(df_out, out) # sumar df especifico
                        
                        end 
                    end
            end
            nombre = "$(tariff_set_list[i])-$contador-$formatoTiempo" 
            CSV.write("$carpeta_salida/$nombre.csv", out) # guardar df por tarifa
        end
    end
end
""

""

---------------------------------------------------------------------------------------------------------------------------------------------------