In [None]:
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RANSACRegressor
from sklearn.metrics import mean_squared_error
from scipy.optimize import curve_fit
from matplotlib.backends.backend_pdf import PdfPages
import os

In [None]:
file_path = r'/Users/behnam/Library/CloudStorage/OneDrive-McMasterUniversity/Post doc/Works/expedia_data_for_macc_without_index_names_2024- revised.csv'
base = pd.read_csv(file_path)
base['FISCAL_DATE'] = pd.to_datetime(base['FISCAL_DATE'])
basic = base.set_index('FISCAL_DATE')
basic

In [None]:
def linear_regression(X, y):
    model = LinearRegression()
    model.fit(X.reshape(-1, 1), y)
    return model

def ransac_regression(X, y):
    model = RANSACRegressor(LinearRegression())
    model.fit(X.reshape(-1, 1), y)
    return model

def generalized_logistic(X, a, b, c, d, v):
    return a + (c - a) / (1 + np.exp(b * (d - X))**(1/v))

def gompertz_curve(X, a, b, c, d):
    return a + (b + np.exp(-c * np.exp(-d * X)))

def exponential_rise(X, a, b, c):
    return a - (b + np.exp(-c * X))

def michaelis_menten(X, a, b):
    return (a * X) / (b + X)

def logarithmic_function(X, a, b, c):
    return a + b * np.log(X - c)

def fit_and_predict(X_sorted, y_sorted, method):
    if method == 'linear_regression':
        model = linear_regression(X_sorted, y_sorted)
        return model.predict(X_sorted.reshape(-1, 1))
    elif method == 'ransac_regression':
        model = ransac_regression(X_sorted, y_sorted)
        return model.predict(X_sorted.reshape(-1, 1))
    elif method == 'generalized_logistic':
        p0 = [np.min(y_sorted), 1, np.max(y_sorted), np.median(X_sorted), 1]  # Reasonable initial guess
        bounds = (0, [np.inf, np.inf, np.inf, np.inf, np.inf])
        popt, _ = curve_fit(generalized_logistic, X_sorted, y_sorted, p0=p0, bounds=bounds, maxfev=10000)
        return generalized_logistic(X_sorted, *popt)
    elif method == 'gompertz_curve':
        p0 = [np.min(y_sorted), 0.1, 0.1, 0.1]  
        bounds = (0, [np.inf, np.inf, np.inf, np.inf])
        popt, _ = curve_fit(gompertz_curve, X_sorted, y_sorted, p0=p0, bounds=bounds, maxfev=10000)
        return gompertz_curve(X_sorted, *popt)
    elif method == 'exponential_rise':
        p0 = [np.max(y_sorted), 1, 1]  
        bounds = (0, [np.inf, np.inf, np.inf])
        popt, _ = curve_fit(exponential_rise, X_sorted, y_sorted, p0=p0, bounds=bounds, maxfev=10000)
        return exponential_rise(X_sorted, *popt)
    elif method == 'michaelis_menten':
        p0 = [np.max(y_sorted), np.median(X_sorted)]  
        bounds = (0, [np.inf, np.inf])
        popt, _ = curve_fit(michaelis_menten, X_sorted, y_sorted, p0=p0, bounds=bounds, maxfev=10000)
        return michaelis_menten(X_sorted, *popt)
    elif method == 'logarithmic_function':
        p0 = [np.min(y_sorted), 1, np.median(X_sorted)]  
        bounds = (0, [np.inf, np.inf, np.inf])
        popt, _ = curve_fit(logarithmic_function, X_sorted, y_sorted, p0=p0, bounds=bounds, maxfev=10000)
        return logarithmic_function(X_sorted, *popt)

aggregated_mse_path = r'C:\Users\Alavis1\Documents\Works\Budgetsets2\regression_results\final_aggregated_mse_results.csv'
best_methods_df = pd.read_csv(aggregated_mse_path)

subset_directory = r'C:\Users\Alavis1\Documents\Works\Budgetsets2'

max_x, max_y = -np.inf, -np.inf

for index, row in best_methods_df.iterrows():
    file_base = row['base_filename']
    for month in range(1, 13):
        monthly_file = f'{file_base}_monthly_{pd.Timestamp(f"2024-{month:02d}-01").strftime("%B")}.csv'
        file_path = os.path.join(subset_directory, monthly_file)
        if not os.path.exists(file_path):
            continue

        monthly_data = pd.read_csv(file_path)
        if monthly_data.empty:
            continue

        X = monthly_data['$ Advertising Cost'].values
        y = monthly_data['$ Profit'].values

        max_x = max(max_x, X.max())
        max_y = max(max_y, y.max())