In [22]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Încărcarea datelor
df = pd.read_csv("expanded_ev_charging_patterns_data_encoded.csv")

# Selectăm caracteristicile
features = [
    "Battery Capacity (kWh)", "Energy Consumed (kWh)", "Charging Duration (hours)",
    "Charging Rate (kW)", "Time of Day", "Day of Week", "State of Charge (Start %)",
    "State of Charge (End %)", "Distance Driven (since last charge) (km)", "Temperature (°C)",
    "Vehicle Age (years)", "Vehicle Model_Chevy Bolt", "Vehicle Model_Hyundai Kona", 
    "Vehicle Model_Nissan Leaf", "Vehicle Model_Tesla Model 3",
    "Charging Station Location_Houston", "Charging Station Location_Los Angeles", 
    "Charging Station Location_New York", "Charging Station Location_San Francisco", 
    "Charger Type_Level 1", "Charger Type_Level 2", "User Type_Commuter", "User Type_Long-Distance Traveler"
]

X = df[features]
y = df["Charging Cost (USD)"]

# Împărțim datele
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizare
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Antrenare model MLP cu regularizare L2 și mai multe iterații
mlp = MLPRegressor(hidden_layer_sizes=(128, 64, 32, 16), activation='relu', solver='adam', ## hiperparametri Grid Search
                   learning_rate_init=0.0005, max_iter=3000, random_state=42, alpha=0.0001)
              



# Model MLP
# mlp = MLPRegressor(
    # hidden_layer_sizes=(128,),
    # activation='logistic',
    # solver='adam',
    # learning_rate_init=0.0034387184317667416,
    # max_iter=3000,
    # random_state=42,
    # alpha=0.009493154312767503
# )
mlp.fit(X_train_scaled, y_train)

# Salvăm modelul și scalerul
joblib.dump(mlp, "charging_cost_model.h5")
joblib.dump(scaler, "scaler.pkl")

# Predicții
y_pred = mlp.predict(X_test_scaled)

# Evaluare
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Eroare medie absolută (MAE): {mae:.2f} USD")
print(f"Eroare pătratică medie (MSE): {mse:.2f}")
print(f"R-squared pe test set: {r2:.2f}")

# Cross-Validation pentru R²
cv_scores = cross_val_score(mlp, X_train_scaled, y_train, cv=5, scoring='r2')
print(f"Cross-Validation R² scores: {cv_scores}")
print(f"Cross-Validation R² media: {np.mean(cv_scores):.2f}")

# Funcție predicție utilizatori noi
def estimate_charging_cost(input_data):
    input_df = pd.DataFrame([input_data], columns=features)
    input_scaled = scaler.transform(input_df)
    predicted_cost = mlp.predict(input_scaled)[0]
    return round(predicted_cost, 2)

# Exemplu de utilizare
example_input = [108, 50, 1.5, 80, 2.0, 3, 50, 100, 200, 15, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0]
print(f"Cost estimat: {estimate_charging_cost(example_input)} USD")


Eroare medie absolută (MAE): 0.46 USD
Eroare pătratică medie (MSE): 10.82
R-squared pe test set: 0.91
Cross-Validation R² scores: [0.75677491 0.81621954 0.73942668 0.76986984 0.74398763]
Cross-Validation R² media: 0.77
Cost estimat: 14.64 USD


In [19]:
# param_grid = {
#     'hidden_layer_sizes': [(128, 64, 32, 16), (64, 32, 16)],
#     'activation': ['relu'],
#     'solver': ['adam', 'lbfgs'],
#     'learning_rate_init': [0.001, 0.0005],
#     'alpha': [0.001, 0.0001]
# }

# grid_search = GridSearchCV(MLPRegressor(max_iter=3000, random_state=42), param_grid, cv=3)
# grid_search.fit(X_train_scaled, y_train)

# print(f"Best parameters: {grid_search.best_params_}")
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor

param_grid = {
    'hidden_layer_sizes': [(128, 64, 32, 16), (64, 32, 16)],
    'activation': ['relu'],
    'solver': ['adam', 'lbfgs'],
    'learning_rate_init': [0.001, 0.0005],
    'alpha': [0.001, 0.0001]
}

# GridSearch optimizat pentru MSE
grid_search = GridSearchCV(
    MLPRegressor(max_iter=3000, random_state=42),
    param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=2
)

# Antrenăm
grid_search.fit(X_train_scaled, y_train)

# Rezultate
best_params = grid_search.best_params_
best_score_neg_mse = grid_search.best_score_

print(f"Best parameters: {best_params}")
print(f"Best CV MSE: {-best_score_neg_mse:.4f}")

# Testăm pe X_test
best_mlp = grid_search.best_estimator_
y_pred_best = best_mlp.predict(X_test_scaled)

# Evaluare pe test set
print(f"Test Set R²: {r2_score(y_test, y_pred_best):.4f}")
print(f"Test Set MAE: {mean_absolute_error(y_test, y_pred_best):.4f} USD")
print(f"Test Set MSE: {mean_squared_error(y_test, y_pred_best):.4f}")


Fitting 3 folds for each of 16 candidates, totalling 48 fits
Best parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (128, 64, 32, 16), 'learning_rate_init': 0.0005, 'solver': 'adam'}
Best CV MSE: 48.6737
Test Set R²: 0.9088
Test Set MAE: 0.4589 USD
Test Set MSE: 10.8189


In [1]:
import joblib

In [19]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error

# 1. Încărcarea dataset-ului scalat (NU aplicăm din nou scalarea!)
df = pd.read_csv("ev_charging_patterns_scaled.csv")

# 2. Selectăm caracteristicile și variabila țintă
features = ["Battery Capacity (kWh)", "Energy Consumed (kWh)", "Charging Duration (hours)",
            "Charging Rate (kW)", "Time of Day", "Day of Week", "State of Charge (Start %)",
            "State of Charge (End %)", "Distance Driven (since last charge) (km)", "Temperature (°C)",
            "Vehicle Age (years)", "Vehicle Model_Chevy Bolt", "Vehicle Model_Hyundai Kona", 
            "Vehicle Model_Nissan Leaf", "Vehicle Model_Tesla Model 3",
            "Charging Station Location_Houston", "Charging Station Location_Los Angeles", 
            "Charging Station Location_New York", "Charging Station Location_San Francisco", 
            "Charger Type_Level 1", "Charger Type_Level 2", "User Type_Commuter", "User Type_Long-Distance Traveler"]

X = df[features]
y = df["Charging Cost (USD)"].values

# 3. Evităm valori negative în eticheta țintă
y = np.maximum(y, 0)

# 4. Împărțirea în set de antrenare și test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Încărcăm scaler-ul folosit inițial pentru scalarea datelor (dacă nu există, ar trebui să fie salvat înainte!)
scaler = joblib.load("scaler.pkl")

# 6. Antrenare model MLPRegressor
mlp = MLPRegressor(                    ###### best hyperparamets Algoritm genetic 
    hidden_layer_sizes=(128,),
    activation='logistic',
    solver='adam',
    learning_rate_init=0.0034387184317667416,
    max_iter=3000,
    random_state=42,
    alpha=0.009493154312767503
)
mlp.fit(X_train, y_train)

# 7. Salvăm modelul antrenat
joblib.dump(mlp, "charging_cost_model.h5")

# 8. Predicții pe setul de test
y_pred = mlp.predict(X_test)

# 9. Evităm valori negative la predicții
y_pred = np.maximum(y_pred, 0)

# 10. Evaluare model
mae = mean_absolute_error(y_test, y_pred)
print(f"Eroare medie absolută: {mae:.2f} USD")

# 11. Funcție de predicție pentru utilizatori noi (folosim scaler-ul salvat)
def estimate_charging_cost(input_data):
    input_df = pd.DataFrame([input_data], columns=features)  # Convertim într-un DataFrame
    input_scaled = scaler.transform(input_df)  # Aplicăm normalizarea folosind scaler-ul salvat
    predicted_cost = mlp.predict(input_scaled)[0]
    return max(0, round(predicted_cost, 2))  # Evităm valori negative

# 12. Exemplu de utilizare
example_input = [108, 50, 1.5, 80, 2.0, 3, 50, 100, 200, 15, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0]
print(f"Cost estimat: {estimate_charging_cost(example_input)} USD")


Eroare medie absolută: 0.52 USD
Cost estimat: 0.43 USD




In [23]:
import numpy
import joblib

print("NumPy version:", numpy.__version__)
print("Joblib version:", joblib.__version__)


NumPy version: 1.26.4
Joblib version: 1.2.0


In [20]:

import pandas as pd
import numpy as np
import joblib
import tkinter as tk
from tkinter import messagebox
from sklearn.preprocessing import StandardScaler

# Încărcarea modelului și a scalerului
model = joblib.load("charging_cost_model.h5")
scaler = joblib.load("scaler.pkl")

# Caracteristici utilizate de model
features = ["Battery Capacity (kWh)", "Energy Consumed (kWh)", "Charging Duration (hours)",
            "Charging Rate (kW)", "Time of Day", "Day of Week", "State of Charge (Start %)",
            "State of Charge (End %)", "Distance Driven (since last charge) (km)", "Temperature (°C)",
            "Vehicle Age (years)", "Vehicle Model_Chevy Bolt", "Vehicle Model_Hyundai Kona",
            "Vehicle Model_Nissan Leaf", "Vehicle Model_Tesla Model 3",
            "Charging Station Location_Houston", "Charging Station Location_Los Angeles",
            "Charging Station Location_New York", "Charging Station Location_San Francisco",
            "Charger Type_Level 1", "Charger Type_Level 2", "User Type_Commuter", "User Type_Long-Distance Traveler"]

# Funcție pentru estimarea costului
def estimate_cost():
    try:
        input_values = [float(entries[feature].get()) for feature in features]
        input_df = pd.DataFrame([input_values], columns=features)
        input_scaled = scaler.transform(input_df)
        predicted_cost = model.predict(input_scaled)[0]
        result_label.config(text=f"Cost estimat: {round(predicted_cost, 2)} USD")
    except ValueError:
        messagebox.showerror("Eroare", "Introduceți valori numerice valide pentru toate câmpurile!")

# Creare interfață Tkinter
root = tk.Tk()
root.title("Estimare Cost Încărcare EV")
canvas = tk.Canvas(root)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar = tk.Scrollbar(root, orient=tk.VERTICAL, command=canvas.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
canvas.configure(yscrollcommand=scrollbar.set)
frame = tk.Frame(canvas)
canvas.create_window((0, 0), window=frame, anchor="nw")

entries = {}
for i, feature in enumerate(features):
    tk.Label(frame, text=feature).grid(row=i, column=0, sticky="w")
    entry = tk.Entry(frame)
    entry.grid(row=i, column=1)
    entries[feature] = entry

submit_button = tk.Button(frame, text="Estimează Costul", command=estimate_cost)
submit_button.grid(row=len(features), column=0, columnspan=2, pady=10)

result_label = tk.Label(frame, text="", font=("Arial", 12, "bold"))
result_label.grid(row=len(features) + 1, column=0, columnspan=2)

frame.update_idletasks()
canvas.config(scrollregion=canvas.bbox("all"))
root.mainloop()




In [35]:
model = joblib.load("charging_cost_model.h5")
scaler = joblib.load("scaler.pkl")



In [7]:
import random
import numpy as np
import pandas as pd
from deap import base, creator, tools, algorithms
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.exceptions import ConvergenceWarning
import warnings

# === Optional: suppress convergence warnings from MLP ===
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# === Load dataset ===
data = pd.read_csv("ev_charging_patterns_data_encoded.csv")
X = pd.get_dummies(data.drop('Charging Duration (hours)', axis=1))
y = data['Charging Duration (hours)']

# Preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# === Define hyperparameter options ===
layer_options = [
    (64,), (128,), (64, 32), (128, 64), (128, 64, 32), (128, 64, 32, 16)
]
activation_options = ['relu', 'tanh', 'logistic']
alpha_range = (1e-5, 1e-2)
lr_range = (1e-4, 1e-2)
batch_range = (16, 128)

decode_layer = {i: val for i, val in enumerate(layer_options)}
decode_activation = {i: val for i, val in enumerate(activation_options)}

# === Handle DEAP redefinition (for notebook reruns) ===
if 'FitnessMin' in creator.__dict__:
    del creator.FitnessMin
if 'Individual' in creator.__dict__:
    del creator.Individual

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

# === DEAP setup ===
toolbox = base.Toolbox()
toolbox.register("attr_layers", random.randint, 0, len(layer_options)-1)
toolbox.register("attr_activation", random.randint, 0, len(activation_options)-1)
toolbox.register("attr_alpha", random.uniform, *alpha_range)
toolbox.register("attr_lr", random.uniform, *lr_range)
toolbox.register("attr_batch", random.randint, *batch_range)

toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_layers,
                  toolbox.attr_activation,
                  toolbox.attr_alpha,
                  toolbox.attr_lr,
                  toolbox.attr_batch), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# === Evaluation function ===
def evaluate(ind):
    # Fix categorical values (clip to valid indices)
    ind[0] = int(min(max(ind[0], 0), len(decode_layer)-1))
    ind[1] = int(min(max(ind[1], 0), len(decode_activation)-1))

    # Clip continuous values to valid ranges
    ind[2] = max(1e-6, float(ind[2]))  # alpha
    ind[3] = max(1e-6, float(ind[3]))  # learning_rate_init
    batch_size = max(16, min(int(ind[4]), 256))  # valid batch size

    try:
        model = MLPRegressor(
            hidden_layer_sizes=decode_layer[ind[0]],
            activation=decode_activation[ind[1]],
            alpha=ind[2],
            learning_rate_init=ind[3],
            batch_size=batch_size,
            solver='adam',
            max_iter=500,
            early_stopping=True,
            random_state=42
        )
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        mse = mean_squared_error(y_test, preds)
        return (mse,)
    except Exception as e:
        print(f"Error with individual {ind}: {e}")
        return (float('inf'),)

# === Genetic operators ===
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

# === Run Genetic Algorithm ===
population = toolbox.population(n=12)
NGEN = 5
hof = tools.HallOfFame(1)

algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.3,
                    ngen=NGEN, halloffame=hof, verbose=True)

# === Display best hyperparameters ===
best = hof[0]
best_params = {
    "hidden_layer_sizes": decode_layer[int(best[0])],
    "activation": decode_activation[int(best[1])],
    "alpha": max(1e-6, float(best[2])),
    "learning_rate_init": max(1e-6, float(best[3])),
    "batch_size": max(16, min(int(best[4]), 256))
}

print("\n✅ Best Hyperparameters Found:")
for k, v in best_params.items():
    print(f"{k}: {v}")


gen	nevals
0  	12    
1  	8     
2  	9     
3  	7     
4  	9     
5  	3     

✅ Best Hyperparameters Found:
hidden_layer_sizes: (128, 64)
activation: logistic
alpha: 0.9686070743997927
learning_rate_init: 0.0055383326465373115
batch_size: 116


In [10]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# --- Încarcă datele ---
df = pd.read_csv("ev_charging_synthetic_data.csv")

# --- Adaugă feature engineering ---
def add_features(df):
    soc_diff = df['State of Charge (End %)'] - df['State of Charge (Start %)']
    df['Charging Efficiency (kWh/h)'] = df['Energy Consumed (kWh)'] / df['Charging Rate (kW)'].replace(0, np.nan)
    df['Energy per Charge %'] = df['Energy Consumed (kWh)'] / soc_diff.replace(0, np.nan)
    df['Distance per kWh'] = df['Distance Driven (since last charge) (km)'] / df['Energy Consumed (kWh)'].replace(0, np.nan)
    df['Total Charge Gained'] = soc_diff
    df['Charger Efficiency'] = df['Charging Rate (kW)'] / soc_diff.replace(0, np.nan)
    avg_temp_ref = 20
    df['Temperature Adjusted Consumption'] = df['Energy Consumed (kWh)'] * (1 + abs(df['Temperature (°C)'] - avg_temp_ref) / avg_temp_ref)
    df.fillna(0, inplace=True)
    return df

df = add_features(df)

# --- Features și target (schimbat target la Charging Cost) ---
features = [
    "Battery Capacity (kWh)", "Energy Consumed (kWh)", "Charging Rate (kW)", "Time of Day", "Day of Week",
    "State of Charge (Start %)", "State of Charge (End %)", "Distance Driven (since last charge) (km)",
    "Temperature (°C)", "Vehicle Age (years)",

    "Vehicle Model_BMW i3", "Vehicle Model_Chevy Bolt", "Vehicle Model_Hyundai Kona",
    "Vehicle Model_Nissan Leaf", "Vehicle Model_Tesla Model 3",

    "Charging Station Location_Chicago", "Charging Station Location_Houston",
    "Charging Station Location_Los Angeles", "Charging Station Location_New York",
    "Charging Station Location_San Francisco",

    "User Type_Casual Driver", "User Type_Commuter", "User Type_Long-Distance Traveler",

    "Charger Type_DC Fast Charger", "Charger Type_Level 1", "Charger Type_Level 2",

    "Charging Efficiency (kWh/h)", "Energy per Charge %", "Distance per kWh",
    "Total Charge Gained", "Charger Efficiency", "Temperature Adjusted Consumption"
]

target = "Charging Cost (USD)"

X = df[features]
y = df[target]

# --- Split și scalare ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- Model ---
mlp = MLPRegressor(
    hidden_layer_sizes=(128, 64, 32, 16),
    activation='relu',
    solver='adam',
    learning_rate_init=0.0005,
    max_iter=3000,
    random_state=42,
    alpha=0.0001
)

mlp.fit(X_train_scaled, y_train)

# --- Evaluare ---
y_pred = mlp.predict(X_test_scaled)
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f} USD")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")
print(f"R2: {r2_score(y_test, y_pred):.2f}")

cv_scores = cross_val_score(mlp, X_train_scaled, y_train, cv=5, scoring='r2')
print(f"Cross-validation R² scores: {cv_scores}")
print(f"Cross-validation R² mean: {np.mean(cv_scores):.2f}")

# --- Funcție predicție cost ---
def estimate_charging_cost(input_data):
    input_df = pd.DataFrame([input_data], columns=features)
    input_scaled = scaler.transform(input_df)
    pred = mlp.predict(input_scaled)[0]
    return round(pred, 2)
def compute_derived_features(input_data):
    energy = input_data["Energy Consumed (kWh)"]
    rate = input_data["Charging Rate (kW)"]
    soc_start = input_data["State of Charge (Start %)"]
    soc_end = input_data["State of Charge (End %)"]
    dist = input_data["Distance Driven (since last charge) (km)"]
    temp = input_data["Temperature (°C)"]

    charge_diff = soc_end - soc_start if (soc_end - soc_start) != 0 else 1e-6  # evită divizare la zero

    derived = {
        "Charging Efficiency (kWh/h)": energy / rate if rate != 0 else 0,
        "Energy per Charge %": energy / charge_diff,
        "Distance per kWh": dist / energy if energy != 0 else 0,
        "Total Charge Gained": charge_diff,
        "Charger Efficiency": rate / charge_diff,
        "Temperature Adjusted Consumption": energy * (1 + abs(temp - 20)/20)
    }
    input_data_updated = input_data.copy()
    input_data_updated.update(derived)
    return input_data_updated

# --- Exemplu input ---
test_input_basic = {
    "Battery Capacity (kWh)": 50,
    "Energy Consumed (kWh)": 23.8,
    "Charging Rate (kW)": 34,
    "Time of Day": 14,
    "Day of Week": 3,
    "State of Charge (Start %)": 7.5,
    "State of Charge (End %)": 75.0,
    "Distance Driven (since last charge) (km)": 130,
    "Temperature (°C)": 18,
    "Vehicle Age (years)": 5,

    "Vehicle Model_BMW i3": 1,
    "Vehicle Model_Chevy Bolt": 0,
    "Vehicle Model_Hyundai Kona": 0,
    "Vehicle Model_Nissan Leaf": 0,
    "Vehicle Model_Tesla Model 3": 0,

    "Charging Station Location_Chicago": 0,
    "Charging Station Location_Houston": 1,
    "Charging Station Location_Los Angeles": 0,
    "Charging Station Location_New York": 0,
    "Charging Station Location_San Francisco": 0,

    "User Type_Casual Driver": 1,
    "User Type_Commuter": 0,
    "User Type_Long-Distance Traveler": 0,

    "Charger Type_DC Fast Charger": 0,
    "Charger Type_Level 1": 0,
    "Charger Type_Level 2": 1,


}
test_input_full = compute_derived_features(test_input_basic)

# --- 7. Transformă inputul în DataFrame în ordinea corectă ---
input_df = pd.DataFrame([test_input_full])
input_df = input_df.reindex(columns=X.columns, fill_value=0)

predicted_cost = estimate_charging_cost(test_input_full)
print(f"Predicted Charging Cost: {predicted_cost} USD")


MAE: 0.23 USD
MSE: 0.08
R2: 0.98
Cross-validation R² scores: [0.97458157 0.98914381 0.98075076 0.98452031 0.98568969]
Cross-validation R² mean: 0.98
Predicted Charging Cost: 3.69 USD


In [7]:
print(df.groupby(df.filter(like='Charger Type').columns.tolist())['Charging Cost (USD)'].max())

Charger Type_DC Fast Charger  Charger Type_Level 1  Charger Type_Level 2
0                             0                     1                       11.959452
                              1                     0                       12.293938
1                             0                     0                       12.227615
Name: Charging Cost (USD), dtype: float64
