# **1. Import Library**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
import itertools
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline


import warnings
import sys
if not sys.warnoptions:
    warnings.simplefilter("ignore")

# **2. Input Dataset**

In [2]:
path = "C:/Users/User/Videos/Project Management and Machine Learning/Dataset/COST/KONS"

# **3.Clear Excel Output** 

In [3]:
def clear_all_data(file):
    workbook = load_workbook(file)
    sheet_names = workbook.sheetnames
    
    if not sheet_names:
        # If there are no sheets, create a new sheet and make it visible
        workbook.create_sheet("Sheet1")
    else:
        # Remove all sheets except the first one
        for sheet_name in sheet_names[1:]:
            workbook.remove(workbook[sheet_name])
    
    workbook.save(file)
    workbook.close()

In [4]:
clear_path = "C:/Users/User/Videos/Project Management and Machine Learning/Konvesional/KONS/COST"

In [5]:
excel_file1 = clear_path+"/RESULT-KONVE-KONS-COST.xlsx"
excel_file2 = clear_path+"/PARAM-KONVE-KONS-COST.xlsx"

clear_all_data(excel_file1)
clear_all_data(excel_file2)

# **3. Modeling**

In [6]:
class KonveRegressor:
    def __init__(self, data_source_file, result_file,params_file):
        self.data_source_file = data_source_file
        self.result_file = result_file
        self.params_file = params_file
        self.models = {
            'ANN':MLPRegressor ,
            'SVM': SVR,
            'Poly': make_pipeline(PolynomialFeatures(),LinearRegression())
        }
    

    def train_and_predict(self, sheet, model_name='ANN', param_grid=None):
        # Splitting features and label
        data = pd.read_excel(self.data_source_file, sheet_name=sheet)
        X = data.drop(columns='ACWP')
        y = data['ACWP']

        # Splitting data into training and testing sets
        x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=58)
        x_train = x_train.sort_index(ascending=True)
        y_train = y_train.sort_index(ascending=True)
        x_test = x_test.sort_index(ascending=True)
        y_test = y_test.sort_index(ascending=True)

        # Model selection
        model_class = self.models.get(model_name)
        if model_class is None:
            raise ValueError(f"Invalid model name: {model_name}")

        # Parameter tuning
        best_params, results = self.tune_parameters(model_class, x_train, y_train, param_grid)

        # Train the model with the best parameters
        if model_name == 'Poly':
            model = make_pipeline(PolynomialFeatures(**{'degree': best_params['polynomialfeatures__degree'],
                                                       'include_bias':best_params['polynomialfeatures__include_bias']}),
                                     LinearRegression(**{'normalize':best_params['linearregression__normalize']}))
        else:
            model = model_class(**best_params)
        
        model.fit(x_train, y_train)
        
        # Predict on the test set
        x_test.reset_index(drop=True, inplace=True)

        ev_pred = model.predict(x_test)

        # Create a DataFrame with predictions
        perform = pd.DataFrame({'AT':x_test.iloc[:,0].values,'BCWP':x_test.iloc[:,1].values,
                                'ACWP': y_test.values, 'ACWP_Pred': ev_pred, 'Model': model_name})
        results ['Model']=model_name
        results ['Method'] = "konvensional"
        results ['Subwork'] = sheet 
        results ['Work'] = "KONS"
        results ['Process'] = "Cost"
        
        perform ['Method'] = "konvensional"
        perform ['Subwork'] = sheet 
        perform ['Work'] = "KONS"
        perform ['Process'] = "Cost"
        # Save the results to Excel files
        self.to_excel(perform, self.result_file, sheet)
        self.to_excel(results, self.params_file, sheet)

    def tune_parameters(self, model_class, X, y, param_grid=None):
        if param_grid is None:
            param_grid = {}

        best_params = None
        best_score = float('inf')
        results = []
        
        if isinstance(model_class, Pipeline) :
            for params in self.grid_search(param_grid):
                model_class == make_pipeline(PolynomialFeatures(), LinearRegression())
                degree = params['polynomialfeatures__degree']
                normalize = params['linearregression__normalize']
                bias = params['polynomialfeatures__include_bias']
                model = make_pipeline(PolynomialFeatures(degree=degree,include_bias = bias), LinearRegression(normalize=normalize))
                model.set_params(**params)
                model.fit(X, y)
                y_pred = model.predict(X)
                r2 = self.evaluate_r2(y, y_pred)
                rmse = self.evaluate_rmse(y, y_pred)

                results.append({**params, 'R2': r2, 'RMSE': rmse})

                if rmse < best_score:
                    best_score = rmse
                    best_params = params
        else :
            model = model_class() 
            for params in self.grid_search(param_grid):
                model = model_class()
            

                model.set_params(**params)
                model.fit(X, y)
                y_pred = model.predict(X)
                r2 = self.evaluate_r2(y, y_pred)
                rmse = self.evaluate_rmse(y, y_pred)

                results.append({**params, 'R2': r2, 'RMSE': rmse})

                if rmse < best_score:
                    best_score = rmse
                    best_params = params

        results_df = pd.DataFrame(results)
        return best_params, results_df

    @staticmethod
    def grid_search(param_grid):
        keys, values = zip(*param_grid.items())
        for combination in itertools.product(*values):
            yield dict(zip(keys, combination))

    @staticmethod
    def evaluate_r2(y_true, y_pred):
        return r2_score(y_true, y_pred)

    @staticmethod
    def evaluate_rmse(y_true, y_pred):
        return (np.sqrt(mean_squared_error(y_true, y_pred)))/1000000000
    
    @staticmethod
    def to_excel(df, file, sheet_name):
        try:
            book = load_workbook(file)
            writer = pd.ExcelWriter(file, engine='openpyxl')
            writer.book = book
            writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

            if sheet_name in writer.sheets:
                sheet = writer.sheets[sheet_name]
                last_row = sheet.max_row
            else:
                last_row = 0

            if last_row < 1:
                df.to_excel(writer, sheet_name=sheet_name, index=False)
            else:
                df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=last_row)

            writer.save()
        except FileNotFoundError:
            df.to_excel(file, sheet_name=sheet_name, index=False)

# **Running K1B1**

In [7]:
data_source_file = path+"/Dataset.xlsx"
result_file = "RESULT-KONVE-KONS-COST.xlsx"
params_file = "PARAM-KONVE-KONS-COST.xlsx"

ensemble = KonveRegressor(data_source_file, result_file,params_file )


param_grid_ann = { 'hidden_layer_sizes': [10,50, 100],'activation': ['logistic', 'tanh', 'relu'],'learning_rate_init': [0.01, 0.05, 0.01] }
param_grid_svm = { 'kernel': [ 'rbf'] ,'C':[1,2,4] ,'epsilon':[0.1,0.2,0.3]}
param_grid_poly = {
    'polynomialfeatures__degree': [2, 3, 4],  
    'linearregression__normalize': [True, False],
    'polynomialfeatures__include_bias' : [True, False]
}

In [8]:
ensemble.train_and_predict('K1B1', model_name='ANN', param_grid=param_grid_ann)

In [9]:
ensemble.train_and_predict('K1B1', model_name='SVM', param_grid=param_grid_svm)

In [10]:
ensemble.train_and_predict('K1B1', model_name='Poly', param_grid=param_grid_poly)

# **Running K1B2**

In [11]:
ensemble.train_and_predict('K1B2', model_name='ANN', param_grid=param_grid_ann)

In [12]:
ensemble.train_and_predict('K1B2', model_name='SVM', param_grid=param_grid_svm)

In [13]:
ensemble.train_and_predict('K1B2', model_name='Poly', param_grid=param_grid_poly)

# **Running K2B1**

In [14]:
ensemble.train_and_predict('K2B1', model_name='ANN', param_grid=param_grid_ann)

In [15]:
ensemble.train_and_predict('K2B1', model_name='SVM', param_grid=param_grid_svm)

In [16]:
ensemble.train_and_predict('K2B1', model_name='Poly', param_grid=param_grid_poly)

# **Running K2B2**

In [17]:
ensemble.train_and_predict('K2B2', model_name='ANN', param_grid=param_grid_ann)

In [18]:
ensemble.train_and_predict('K2B2', model_name='SVM', param_grid=param_grid_svm)

In [19]:
ensemble.train_and_predict('K2B2', model_name='Poly', param_grid=param_grid_poly)

# **Running K3B1**

In [20]:
ensemble.train_and_predict('K3B1', model_name='ANN', param_grid=param_grid_ann)

In [21]:
ensemble.train_and_predict('K3B1', model_name='SVM', param_grid=param_grid_svm)

In [22]:
ensemble.train_and_predict('K3B1', model_name='Poly', param_grid=param_grid_poly)

# **Running K3B2**


In [23]:
ensemble.train_and_predict('K3B2', model_name='ANN', param_grid=param_grid_ann)

In [24]:
ensemble.train_and_predict('K3B2', model_name='SVM', param_grid=param_grid_svm)

In [25]:
ensemble.train_and_predict('K3B2', model_name='Poly', param_grid=param_grid_poly)

# **Running KUM**


In [26]:
ensemble.train_and_predict('Kum', model_name='ANN', param_grid=param_grid_ann)

In [27]:
ensemble.train_and_predict('Kum', model_name='SVM', param_grid=param_grid_svm)

In [28]:
ensemble.train_and_predict('Kum', model_name='Poly', param_grid=param_grid_poly)