In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

# Define file paths for training and testing datasets
train_files = [
    'experiment_1.csv', 'experiment_9.csv', 'experiment_20.csv', 
    'experiment_21.csv', 'experiment_23.csv', 'experiment_24.csv'
]
test_files = ['experiment_4.csv', 'experiment_22.csv']

# Function to load and concatenate CSV files
def load_and_concatenate(files):
    dataframes = [pd.read_csv(file) for file in files]
    return pd.concat(dataframes, ignore_index=True)

# Load and preprocess training and testing data
train_data = load_and_concatenate(train_files)
test_data = load_and_concatenate(test_files)

# Display basic information about the datasets
print("Training data:")
print(train_data.head())
print(train_data.info())

print("Testing data:")
print(test_data.head())
print(test_data.info())

def preprocess_data(df):
    # Feature and target
    X = df[['input_voltage']].values
    y = df['el_power'].values

    # Feature scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

X_train, y_train = preprocess_data(train_data)
X_test, y_test = preprocess_data(test_data)

# Define models
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
    'Support Vector Regression': SVR(),
    'K-Nearest Neighbors': KNeighborsRegressor(),
}

# Function to evaluate models
def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    return rmse

# Evaluate models
results = {}
for name, model in models.items():
    rmse = evaluate_model(model, X_train, y_train, X_test, y_test)
    results[name] = rmse
    print(f'{name} - Test RMSE: {rmse}')

# Neural Network (TensorFlow/Keras)
def build_nn_model():
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

nn_model = build_nn_model()
history = nn_model.fit(X_train, y_train, validation_split=0.1, epochs=50, batch_size=32, verbose=1)

# Evaluate Neural Network
y_nn_pred = nn_model.predict(X_test)
nn_rmse = np.sqrt(mean_squared_error(y_test, y_nn_pred))
print(f'Neural Network - Test RMSE: {nn_rmse}')

# Plot training history
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Comparison and final model selection
best_model_name = min(results, key=results.get)
print(f'Best Model: {best_model_name} with RMSE: {results[best_model_name]}')
