# Section 1: Imports

In [None]:
import pandas as pd
import numpy as np
import glob
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 0 = all logs, 1 = INFO removed, 2 = INFO+WARNING, 3 = all (incl. ERROR)

from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
import matplotlib.pyplot as plt

# Section 2: Load and Label Data

In [None]:
# Define m values for each network
m_values = {'Ex_1': 10, 'Ex_2': 21, 'Ex_3': 30, 'Ex_4': 20}

# Load all CSVs from local (NC=32)
dfs = []
for i in range(1, 5):
    df = pd.read_csv(f'Ex_{i}.csv')  # adjust path if needed
    df['m'] = m_values[f'Ex_{i}']
    df['NC'] = 32
    df['source'] = f'Ex_{i}'
    df['R'] = df['tim_K2'] / df['tim_Lp']
    df['p'] = df['#d-MPs'] if '#d-MPs' in df else df['p']
    dfs.append(df[['m', 'p', 'n_LU', 'NC', 'R']])

# Load parallel execution files (NC in {4, ..., 28})
for i in range(1, 5):
    df = pd.read_csv(f'par_Ex_{i}.csv')
    df['m'] = m_values[f'Ex_{i}']
    df['NC'] = df['#th'] if '#th' in df else 28  # fallback default or compute
    df['R'] = df['tim_K'] / df['tim_Lp']
    dfs.append(df[['m', 'p', 'n_LU', 'NC', 'R']])

# Load cloud execution files (NC = 96, 192)
for tag in ['ext', 'Ext']:
    nc = 96 if tag == 'ext' else 192
    for i in range(1, 5):
        df = pd.read_csv(f'{tag}_Ex_{i}.csv')
        df['m'] = m_values[f'Ex_{i}']
        df['NC'] = nc
        df['R'] = df['tim_K2'] / df['tim_Lp']
        df['p'] = df['#d-MPs'] if '#d-MPs' in df else df['p']
        dfs.append(df[['m', 'p', 'n_LU', 'NC', 'R']])

# Combine all
data = pd.concat(dfs, ignore_index=True)
data.dropna(inplace=True)
data = data[data['p'] > 5]  # Filter based on p threshold

# Section 3: Normalize Features

In [None]:
X = data[['m', 'p', 'n_LU', 'NC']]
y = data['R'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Section 4: Define Keras Model

In [None]:
def build_model(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Section 5: K-Fold Training

In [None]:

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold = 1
rmse_scores = []

for train_idx, val_idx in kf.split(X_scaled):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    model = build_model(X_train.shape[1])
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    preds = model.predict(X_val).flatten()
    rmse = np.sqrt(mean_squared_error(y_val, preds))
    rmse_scores.append(rmse)
    print(f"Fold {fold} RMSE: {rmse:.4f}")
    fold += 1

print(f"Average RMSE: {np.mean(rmse_scores):.4f}")

# Section 6: Predict for new NC values

In [None]:
new_NC_values = [64, 128, 256, 512, 1024]
unique_rows = data[['m', 'p', 'n_LU']].drop_duplicates()

predictions = []
for _, row in unique_rows.iterrows():
    for nc in new_NC_values:
        sample = pd.DataFrame([[row['m'], row['p'], row['n_LU'], nc]], columns=['m', 'p', 'n_LU', 'NC'])
        sample_scaled = scaler.transform(sample)
        R_pred = model.predict(sample_scaled, verbose=0)[0][0]
        predictions.append({'m': row['m'], 'p': row['p'], 'nLU': row['n_LU'], 'NC': nc, 'Predicted_R': R_pred})

pred_df = pd.DataFrame(predictions)
pred_df.head()