In [62]:
import pandas as pd
import joblib

We import the loaded model:

In [63]:
model_carregat = joblib.load('model_entrenat.pkl')

We import the data we want to input. Remember to change the filename in the code `df_nou` from `'provesentrevista.csv'` to `'yourfilename.csv'`.

In [None]:
df_nou = pd.read_csv('provesentrevista.csv', sep=';')

Since we've changed certain data, for the model to work correctly, we first need to process the new dataframe so it has the same columns. Let's start by changing the office to office 164 if the client's office is one of the small offices:

In [65]:
# Carrega les oficines agrupades
oficines_agrupades = pd.read_csv("oficines_agrupades.csv")["oficina_agrupada"].tolist()

# Substitueix qualsevol oficina que estigui a la llista pel codi 164
df_nou["oficina"] = df_nou["oficina"].apply(lambda x: 164 if x in oficines_agrupades else x)

If there are missing entries in the new data, the model won't work properly. Let's fix them the same way as during training:

In [66]:
df_nou['edad'] = df_nou['edad'].fillna(38.727759)
df_nou['salario'] = df_nou['salario'].fillna(3871.138544)
df_nou['sexo'].fillna('Desconegut', inplace=True)
df_nou['antiguedad_vehiculo'].fillna('Desconegut', inplace=True)

Maybe the new data doesn’t have all the categorical variables, which would cause the model to fail. These lines of code fix that:

In [67]:
def prepara_dades_noves(df_nou, columnes_entrenament):
    """
    Afegeix les columnes que falten a df_nou perquè coincideixi amb les columnes d'entrenament.
    
    Paràmetres:
    - df_nou: DataFrame amb les noves dades, ja amb get_dummies aplicat
    - columnes_entrenament: Llista de columnes que s'esperen (les que té el model entrenat)

    Retorna:
    - df_nou: amb les columnes afegides i ordenades
    """
    # Afegim les columnes que falten amb valors 0
    for col in columnes_entrenament:
        if col not in df_nou.columns:
            df_nou[col] = 0

    # Eliminem les columnes que no estaven a l'entrenament
    df_nou = df_nou[columnes_entrenament]

    return df_nou


In [68]:
# Llegir les columnes del model
columnes_entrenament = pd.read_csv("columnes_model.csv")["columna"].tolist()

# Columnes correcte:
df_nou = pd.get_dummies(df_nou)  # transformar les categoríques com vas fer en l'entrenament
df_nou = prepara_dades_noves(df_nou, columnes_entrenament)

  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0
  df_nou[col] = 0


In [69]:
# Obtenir prediccions
prediccions = model_carregat.predict(df_nou)

# Afegir les prediccions al DataFrame original
df_nou["prediccio_model"] = prediccions

# Guardar a un nou fitxer CSV
df_nou.to_csv("noves_dades_amb_prediccions.csv", index=False)

print("Fitxer guardat com 'noves_dades_amb_prediccions.csv'")

Fitxer guardat com 'noves_dades_amb_prediccions.csv'


  df_nou["prediccio_model"] = prediccions


If what we want is to evaluate the model with already prepared data, we need to apply this code **after** defining the training columns:
(Note that if you try to run all the cells together, it will give an error because applying both at the same time isn’t compatible as currently designed)

In [70]:
X = df_nou.drop(columns='respuesta')             
y = df_nou['respuesta'] 

X = pd.get_dummies(X)  # transformar les categoríques com vas fer en l'entrenament
X = prepara_dades_noves(X, columnes_entrenament)
prediccions = model_carregat.predict(X)

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print("Accuracy:", accuracy_score(y, prediccions))
print("Matriz de confusió:")
print(confusion_matrix(y, prediccions))
print("Informe de classificació:")
print(classification_report(y, prediccions))

KeyError: "['respuesta'] not found in axis"