<a href="https://colab.research.google.com/github/JhonR-26/Regresion_logistica/blob/main/Actividad%20-%20Regresion%20logistica_expo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%shell
set -e

#---------------------------------------------------#
JULIA_VERSION="1.8.2" # any version ≥ 0.7.0
JULIA_PACKAGES="IJulia BenchmarkTools"
JULIA_PACKAGES_IF_GPU="CUDA" # or CuArrays for older Julia versions
JULIA_NUM_THREADS=2
#---------------------------------------------------#

if [ -z `which julia` ]; then
  # Install Julia
  JULIA_VER=`cut -d '.' -f -2 <<< "$JULIA_VERSION"`
  echo "Installing Julia $JULIA_VERSION on the current Colab Runtime..."
  BASE_URL="https://julialang-s3.julialang.org/bin/linux/x64"
  URL="$BASE_URL/$JULIA_VER/julia-$JULIA_VERSION-linux-x86_64.tar.gz"
  wget -nv $URL -O /tmp/julia.tar.gz # -nv means "not verbose"
  tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1
  rm /tmp/julia.tar.gz

  # Install Packages
  nvidia-smi -L &> /dev/null && export GPU=1 || export GPU=0
  if [ $GPU -eq 1 ]; then
    JULIA_PACKAGES="$JULIA_PACKAGES $JULIA_PACKAGES_IF_GPU"
  fi
  for PKG in `echo $JULIA_PACKAGES`; do
    echo "Installing Julia package $PKG..."
    julia -e 'using Pkg; pkg"add '$PKG'; precompile;"' &> /dev/null
  done

  # Install kernel and rename it to "julia"
  echo "Installing IJulia kernel..."
  julia -e 'using IJulia; IJulia.installkernel("julia", env=Dict(
      "JULIA_NUM_THREADS"=>"'"$JULIA_NUM_THREADS"'"))'
  KERNEL_DIR=`julia -e "using IJulia; print(IJulia.kerneldir())"`
  KERNEL_NAME=`ls -d "$KERNEL_DIR"/julia*`
  mv -f $KERNEL_NAME "$KERNEL_DIR"/julia

  echo ''
  echo "Successfully installed `julia -v`!"
  echo "Please reload this page (press Ctrl+R, ⌘+R, or the F5 key) then"
  echo "jump to the 'Checking the Installation' section."
fi

In [None]:
# Paquetes necesarios
import Pkg;
Pkg.add("Lathe")
using Lathe

Pkg.add("DataFrames")
using DataFrames

Pkg.add("GLM")
using GLM

Pkg.add("StatsBase")
using StatsBase

Pkg.add("MLBase")
using MLBase

Pkg.add("ROCAnalysis")
using ROCAnalysis

Pkg.add("CSV")
using CSV

Pkg.add("Printf")
using Printf

In [None]:
# Ruta al archivo Excel
ruta_archivo = "/content/Base_de_datos_.xlsx - Hoja 1.csv"

# Convertir el archivo en un DataFrame
df = CSV.File(ruta_archivo) |> DataFrame

In [30]:
# Conjunto train y test
using Lathe.preprocess: TrainTestSplit
train, test = TrainTestSplit(df, .80);

# Entrenamiento del modelo
fm = @formula(clase ~ duracion + paginas + acciones + valor)
logit = glm(fm, train, Binomial(), ProbitLink())

StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

clase ~ 1 + duracion + paginas + acciones + valor

Coefficients:
────────────────────────────────────────────────────────────────────────────────
                   Coef.   Std. Error      z  Pr(>|z|)    Lower 95%    Upper 95%
────────────────────────────────────────────────────────────────────────────────
(Intercept)  -0.00716708  0.205073     -0.03    0.9721  -0.409102     0.394768
duracion      0.00115043  0.000669493   1.72    0.0857  -0.00016175   0.00246261
paginas       0.313668    0.122935      2.55    0.0107   0.0727199    0.554617
acciones     -0.168384    0.0521831    -3.23    0.0013  -0.270661    -0.0661072
valor         0.0148742   0.00847748    1.75    0.0793  -0.00174136   0.0314898
─────────────────────────────────────────────────────────

In [17]:
# Coeficientes (B0, B1, B2, B3, B4)
coeficientes = coef(logit)

# Reordenar el número con dos decimales
B0 = @sprintf("%.4f", coeficientes[1])
B1 = @sprintf("%.4f", coeficientes[2])
B2 = @sprintf("%.4f", coeficientes[3])
B3 = @sprintf("%.4f", coeficientes[4])
B4 = @sprintf("%.4f", coeficientes[5])

println("B0 = ", B0, "\n", "B1 = ", B1, "\n", "B2 = ", B2, "\n", "B3 = ", B3, "\n", "B4 = ", B4)

B0 = 0.0110
B1 = 0.0010
B2 = 0.2782
B3 = -0.1360
B4 = 0.0075


In [19]:
# Construir la fórmula
formula_ = "P = 1 / (1 + exp(-(B0 + B1x1 + B2x2 + B3x3 + B4x4)))"

# Reemplazar los valores de los coeficientes en la fórmula
formula_log = replace(formula_, "B0" => B0, "B1" => B1, "B2" => B2, "B3" => B3, "B4" => B4)

println("Fórmula:")
println(formula_log)

Fórmula:
P = 1 / (1 + exp(-(0.0110 + 0.0010x1 + 0.2782x2 + -0.1360x3 + 0.0075x4)))


In [None]:
# Calcular predicciones
prediccion = predict(logit, test)

In [None]:
# Clasificacion de los datos
evaluacion = [if x < 0.5 0 else 1 end for x in prediccion];

prediccion_df = DataFrame(y_actual = test.clase,
						y_predicted = evaluacion,
						prob_predicted = prediccion);
prediccion_df.correctly_classified = prediccion_df.y_actual .== prediccion_df.y_predicted

In [26]:
# Matriz de confusion
matriz_de_confusion = MLBase.roc(prediccion_df.y_actual, prediccion_df.y_predicted)

# Guardado de los valores de la matriz de confusion
tp = matriz_de_confusion.tp
tn = matriz_de_confusion.tn
fp = matriz_de_confusion.fp
fn = matriz_de_confusion.fn

print(matriz_de_confusion)

ROCNums{Int64}
  p = 15
  n = 13
  tp = 7
  tn = 12
  fp = 1
  fn = 8


In [None]:
# Exactitud, Precision y Sensibilidad del modelo
Exactitud = (tp + tn)/(tp + tn + fp + fn)
Precision = tp/(tp + fn)
Sensibilidad = tp/(tp + fn)
println("Exactitud = ", Exactitud, "\n", "Precision = ", Precision, "\n", "Sensibilidad = ", Sensibilidad)

Exactitud = 0.7555555555555555
Precision = 0.8
Sensibilidad = 0.8
