In [183]:
import sqlite3
import pandas as pd


#Importar la base de datos
conexion = sqlite3.connect('datos_sensores.db')

#Codigo SQL para hacer la consulta
query = """
SELECT
datos_basicos.tamaño,
datos_basicos.categoria,
orion.sensor_orion,
vega.sensor_vega,
polaris.sensor_polaris,
antares.sensor_antares,
CASE
	WHEN clasificacion.etiqueta = "Positivo" THEN 1
  WHEN clasificacion.etiqueta = "Negativo" THEN 0
	ELSE NULL
END as Etiqueta_Numerica
FROM datos_basicos
LEFT JOIN orion ON datos_basicos.id = orion.id
LEFT JOIN vega ON datos_basicos.id = vega.id
LEFT JOIN polaris ON datos_basicos.id = polaris.id
LEFT JOIN antares ON datos_basicos.id = antares.id
LEFT JOIN clasificacion ON datos_basicos.id = clasificacion.id
LIMIT 300000
"""

dataset = pd.read_sql_query(query, conexion)
dataset.head(3000000)

Unnamed: 0,tamaño,categoria,sensor_orion,sensor_vega,sensor_polaris,sensor_antares,Etiqueta_Numerica
0,0.02,3,19.77,42.15,35.36,-62.73,1
1,0.11,3,-23.51,-20.64,69.78,-38.02,1
2,0.85,1,86.63,75.26,30.82,-71.62,0
3,0.53,1,-44.91,4.11,71.92,27.17,0
4,0.14,2,-66.34,-48.04,-2.91,-74.93,0
...,...,...,...,...,...,...,...
299995,0.30,2,-81.98,20.64,8.07,40.18,1
299996,0.21,3,-2.74,-81.87,-15.87,40.64,1
299997,0.12,2,-13.86,-27.85,63.55,19.37,0
299998,0.09,1,-3.87,-69.29,86.51,-15.73,1


In [184]:
query2 = """
SELECT
datos_basicos.tamaño,
datos_basicos.categoria,
orion.sensor_orion,
vega.sensor_vega,
polaris.sensor_polaris,
antares.sensor_antares,
CASE
	WHEN clasificacion.etiqueta = "Positivo" THEN 1
	WHEN clasificacion.etiqueta = "Negativo" THEN 0
	ELSE NULL
END as Etiqueta_Numerica
FROM datos_basicos
LEFT JOIN orion ON datos_basicos.id = orion.id
LEFT JOIN vega ON datos_basicos.id = vega.id
LEFT JOIN polaris ON datos_basicos.id = polaris.id
LEFT JOIN antares ON datos_basicos.id = antares.id
LEFT JOIN clasificacion ON datos_basicos.id = clasificacion.id
ORDER BY datos_basicos.id DESC
LIMIT 100
"""
dataset2 = pd.read_sql_query(query2, conexion)
dataset2.head(100)

Unnamed: 0,tamaño,categoria,sensor_orion,sensor_vega,sensor_polaris,sensor_antares,Etiqueta_Numerica
0,0.40,2,36.78,-59.10,-65.14,5.63,
1,0.71,1,15.46,-39.97,45.71,64.84,
2,0.41,3,-48.04,-23.84,32.14,-46.10,
3,0.08,1,-3.33,-28.75,-97.28,30.82,
4,0.61,3,75.72,-65.18,75.00,-33.67,
...,...,...,...,...,...,...,...
95,0.92,3,16.14,98.17,-14.76,10.59,
96,0.86,1,24.14,-31.35,-67.50,29.67,
97,0.86,3,-22.53,10.25,74.24,1.93,
98,0.93,2,2.98,14.86,-22.14,45.21,


### **Entrenamiento del modelo**


In [203]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

X = dataset.drop('Etiqueta_Numerica', axis=1)
y = dataset['Etiqueta_Numerica']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=8)

#Creacion del modelo

modelo = LogisticRegression(max_iter=1000)
modelo.fit(X_train, y_train)

In [204]:
predicciones = modelo.predict(X_test)
predicciones

array([0, 1, 0, ..., 0, 1, 0])

In [205]:
accuracy = accuracy_score(y_test, predicciones)
print(f"Precision del modelo: {accuracy:.2f}")

Precision del modelo: 0.51


In [206]:
reporte = classification_report(y_test, predicciones)
print(reporte)

              precision    recall  f1-score   support

           0       0.51      0.87      0.64     30351
           1       0.53      0.14      0.23     29649

    accuracy                           0.51     60000
   macro avg       0.52      0.51      0.44     60000
weighted avg       0.52      0.51      0.44     60000



In [207]:
matriz_confunsion = confusion_matrix(y_test, predicciones)
print(matriz_confunsion)

[[26509  3842]
 [25364  4285]]


In [208]:
datos = dataset2.drop('Etiqueta_Numerica', axis=1)

datos

Unnamed: 0,tamaño,categoria,sensor_orion,sensor_vega,sensor_polaris,sensor_antares
0,0.40,2,36.78,-59.10,-65.14,5.63
1,0.71,1,15.46,-39.97,45.71,64.84
2,0.41,3,-48.04,-23.84,32.14,-46.10
3,0.08,1,-3.33,-28.75,-97.28,30.82
4,0.61,3,75.72,-65.18,75.00,-33.67
...,...,...,...,...,...,...
95,0.92,3,16.14,98.17,-14.76,10.59
96,0.86,1,24.14,-31.35,-67.50,29.67
97,0.86,3,-22.53,10.25,74.24,1.93
98,0.93,2,2.98,14.86,-22.14,45.21


In [182]:
predicciones2 = modelo.predict(datos)
predicciones2


array([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])

In [209]:
a = []

for i in predicciones2:
  if i == 0:
    a.append("Falso")
  else:
    a.append("Verdadero")

In [212]:
print(a)

['Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Verdadero', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Falso', 'Verdadero', 'Falso', 'Falso', 'Falso']


In [214]:
import csv
id = 300001
archivocsv = "predicciones.csv"
with open(archivocsv, 'w', newline='') as csvfile:

    # Crear el objeto escritor CSV
    writer = csv.writer(csvfile)

    writer.writerow(['ID', 'Etiqueta'])

    # Escribir cada elemento de la lista como una fila en el CSV
    for elemento in a:
        writer.writerow([id, elemento])
        id += 1

print(f'Se ha creado el archivo CSV en: {archivocsv}')

Se ha creado el archivo CSV en: predicciones.csv
