In [1]:
import tkinter
from tkinter import filedialog
from tkinter import ttk
from sklearn.model_selection import train_test_split
from sklearn import linear_model
import pandas as pd

def open_csv_file():
    filetypes = [("Archivos CSV", "*.csv")]
    filename = filedialog.askopenfilename(filetypes=filetypes)
    if filename:
        try:
            global df
            df = pd.read_csv(filename)
            header = df.columns.tolist()
            show_columns(header)
        except pd.errors.EmptyDataError:
            show_error("El archivo está vacío.")
        except pd.errors.ParserError:
            show_error("No se pudo leer el archivo CSV.")
    else:
        show_error("No se seleccionó ningún archivo.")

def show_columns(columns):
    columns_frame = tkinter.Frame(window)
    columns_frame.pack(pady=10)

    global columns_lb
    columns_lb = tkinter.Listbox(columns_frame, selectmode=tkinter.MULTIPLE)
    columns_lb.pack(side="left", padx=5)

    for column in columns:
        columns_lb.insert(tkinter.END, column)

    global train_button
    train_button = tkinter.Button(columns_frame, text="Entrenar modelo", command=train_model, fg="#FFFFFF", bg="#007BFF", relief=tkinter.FLAT)
    train_button.pack(side="left", padx=5)

    global score_button
    score_button = tkinter.Button(columns_frame, text="Calcular score", command=calculate_score, fg="#FFFFFF", bg="#007BFF", relief=tkinter.FLAT, state=tkinter.DISABLED)
    score_button.pack(side="left", padx=5)

def train_model():
    selected_columns = [columns_lb.get(i) for i in columns_lb.curselection()]
    if selected_columns:
        global linear
        linear = linear_model.LinearRegression()

        data_x = df[selected_columns]
        data_y = df["charges"]

        global train_x, test_x, train_y, test_y
        train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=0.1)

        linear.fit(train_x, train_y)
        score_button["state"] = tkinter.NORMAL

        print(f"Modelo entrenado con las columnas: {', '.join(selected_columns)}")

def calculate_score():
    try:
        score = round(linear.score(test_x, test_y) * 100, 2)
        score_label.config(text=f"Puntaje: {score}%")
        print(f"Score: {score}%")

        # Predecir con los datos de prueba
        predicted = linear.predict(test_x)
        print("Primeras 5 predicciones:")
        print(predicted[:5])
    except Exception as e:
        show_error(str(e))

def show_error(message):
    error_label.config(text=message)
    error_label.pack(pady=10)

# Crear ventana principal
window = tkinter.Tk()
window.title("Lector de CSV")
window.config(bg="#F0F0F0")

# Obtener el tamaño de la pantalla del usuario
screen_width = window.winfo_screenwidth()
screen_height = window.winfo_screenheight()

# Calcular el tamaño de la ventana
window_width = int(screen_width * 0.8)
window_height = int(screen_height * 0.8)

# Establecer el tamaño de la ventana
window.geometry(f"{window_width}x{window_height}")

title_lb = tkinter.Label(window, text="CSV Lector", font=("Arial", 16, "bold"), fg="#FFFFFF", bg="#333333", padx=10, pady=10)
title_lb.pack(fill=tkinter.X)

button_frame = tkinter.Frame(window, bg="#F0F0F0")
button_frame.pack(pady=10)

button = tkinter.Button(button_frame, text="Abrir archivo CSV", command=open_csv_file, fg="#FFFFFF", bg="#007BFF", relief=tkinter.FLAT)
button.pack()

score_label = tkinter.Label(window, text="Puntaje: ", font=("Arial", 12), fg="#000000")
score_label.pack(pady=10)

error_label = tkinter.Label(window, text="", font=("Arial", 12), fg="#FF0000")
error_label.pack(pady=10)

window.mainloop()

Modelo entrenado con las columnas: bmi
Score: 5.39%
Primeras 5 predicciones:
[11855.46536169 13585.82675251 11782.64838179 11454.97197223
 14222.0172085 ]
Score: 5.39%
Primeras 5 predicciones:
[11855.46536169 13585.82675251 11782.64838179 11454.97197223
 14222.0172085 ]
Score: 5.39%
Primeras 5 predicciones:
[11855.46536169 13585.82675251 11782.64838179 11454.97197223
 14222.0172085 ]


Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Program Files\Python311\Lib\tkinter\__init__.py", line 1948, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\Amount\AppData\Local\Temp\ipykernel_1716\1826570764.py", line 55, in train_model
    linear.fit(train_x, train_y)
  File "c:\Users\Amount\Desktop\final_project_aa\finalproject\Lib\site-packages\sklearn\linear_model\_base.py", line 648, in fit
    X, y = self._validate_data(
           ^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Amount\Desktop\final_project_aa\finalproject\Lib\site-packages\sklearn\base.py", line 584, in _validate_data
    X, y = check_X_y(X, y, **check_params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Amount\Desktop\final_project_aa\finalproject\Lib\site-packages\sklearn\utils\validation.py", line 1106, in check_X_y
    X = check_array(
        ^^^^^^^^^^^^
  File "c:\Users\Amount\Desktop\final_project_aa\finalproject\Lib\site-packages\skl

Modelo entrenado con las columnas: age
Score: 8.65%
Primeras 5 predicciones:
[15349.0971425  18710.82227536 12763.15473261  8108.45839481
  7849.86415382]
Score: 8.65%
Primeras 5 predicciones:
[15349.0971425  18710.82227536 12763.15473261  8108.45839481
  7849.86415382]
