In [1]:
import time
from scipy.optimize import minimize
from scipy.optimize import curve_fit
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np
import warnings

In [2]:
def mse(predicted,real):
    return ((real-predicted)**2).sum()/len(real)

def first_method(x,y):
    start=time.time()
    X = np.log(x)
    Y = np.log(y)
    X = (X-X.mean())/X.std()
    Y = (Y-Y.mean())/Y.std()
    nan_index=np.where(np.isnan(Y))
    if len(nan_index)>100:
        print(nan_index,x,y)
    X=np.delete(X,nan_index).reshape(-1, 1)
    Y=np.delete(Y,nan_index).reshape(-1, 1)
    if X.size == 0:
        return 0,0
    reg = LinearRegression().fit(X, Y)
    predictions=reg.predict(X)
    end=time.time()
    mse1=mse(np.squeeze(np.exp(predictions.reshape(1,-1))),y)
    return mse1,(end-start)
#********************************************************************************************************
def cost_mse(list):
    mse = ((list[2]+list[0]*np.exp(list[1]*x)-y)**2).sum()/len(y)
    #print each iteration
    #print('mse value: {:0.4f} prodConst: {:.4f} expConst: {:.4f} const: {:.4f}'.format(mse,list[0],list[1],list[2]))  
    return mse    

def second_method(x,y):
    start=time.time()
    pkonst=minimize(cost_mse,x0=np.array([1,np.exp(1),0]),method='Powell')
    end=time.time()
    mse2=mse(((pkonst.x[2])+pkonst.x[0]*(pkonst.x[1])**-x),y)
    return mse2,(end-start)
#********************************************************************************************************    
def func(x, a, b, c):
    return a * np.exp(-b * x) + c    
    
def third_method(x,y):
    start=time.time()
    popt, pcov = curve_fit(func, x, y)
    end=time.time()
    mse3=mse(func(x, *popt),y)
    return mse3,(end-start)

In [3]:
import pandas as pd
df=pd.read_csv("New_volume_forecast_with_sectorial_variational_appending_by_Ulas.csv",sep=";")
df

Unnamed: 0,FCOPERID,datetime,CELL,forecasteddlprbutilbh,actualdlprbutilbh,forecastedtputbh,actualdltputbh
0,E302BBF16BA481458B54787E601EFC25,2019-10-02,KRKTP17L,,,,
1,E302BBF16BA481458B54787E601EFC25,2019-10-03,KRKTP17L,,,,
2,E302BBF16BA481458B54787E601EFC25,2019-10-04,KRKTP17L,,,,
3,E302BBF16BA481458B54787E601EFC25,2019-10-05,KRKTP17L,,,,
4,E302BBF16BA481458B54787E601EFC25,2019-10-06,KRKTP17L,,,,
...,...,...,...,...,...,...,...
4918843,E302BBF16BA481458B54787E601EFC25,2021-12-27,ZAKUM37L,29.222239,71.52,24.899002,13.166
4918844,E302BBF16BA481458B54787E601EFC25,2021-12-28,ZAKUM37L,29.222239,69.51,24.899002,15.281
4918845,E302BBF16BA481458B54787E601EFC25,2021-12-29,ZAKUM37L,29.222239,70.03,24.899002,10.509
4918846,E302BBF16BA481458B54787E601EFC25,2021-12-30,ZAKUM37L,29.222239,75.93,24.899002,13.901


In [4]:
df=df.replace([np.inf, -np.inf], np.nan)
df=df.dropna(subset=['actualdlprbutilbh',"CELL","actualdltputbh"], how="any")
df=df[["CELL","actualdlprbutilbh","actualdltputbh"]].drop_duplicates()
df.describe()

Unnamed: 0,actualdlprbutilbh,actualdltputbh
count,2090488.0,2090488.0
mean,39.56716,19.99538
std,25.38467,15.14766
min,0.34,-1.934
25%,18.31,9.754
50%,35.82,16.604
75%,58.1,26.069
max,99.98,1293.0


In [7]:
warnings.filterwarnings("ignore")

results_df=pd.DataFrame(columns=["Method Name","Total MSE","Total Time Passed"])
results1_df=pd.DataFrame(columns=["Cell","MSE","Passed"])
results2_df=pd.DataFrame(columns=["Cell","MSE","Passed"])
results3_df=pd.DataFrame(columns=["Cell","MSE","Passed"])


total_mse_1,total_mse_2,total_mse_3=0,0,0
total_time_passed_1,total_time_passed_2,total_time_passed_3=0,0,0

counter=0

for cell in set(df.CELL.values):
    y=(df[cell==df.CELL].loc[:,"actualdltputbh"]).to_numpy()
    x=(df[cell==df.CELL].loc[:,"actualdlprbutilbh"]).to_numpy()
    
    try:
        mse1,time_passed1 = first_method(x,y)
        if np.isnan(mse1):
            mse1=-1
        total_mse_1 +=mse1
        total_time_passed_1 +=time_passed1
        results1_df=results1_df.append({"Cell":cell,"MSE":"{:.2f}".format(mse1),
                                        "Passed":"{:.6f}".format(time_passed1)},ignore_index=True)
    except:
        pass
    
    try:
        mse2,time_passed2 = second_method(x,y)
        if np.isnan(mse2):
            mse2=-1
        total_mse_2 += mse2
        total_time_passed_2 += time_passed2
        results2_df=results2_df.append({"Cell":cell,"MSE":"{:.2f}".format(mse2),
                                        "Passed":"{:.6f}".format(time_passed2)},ignore_index=True)
    except:
        pass
    
    try:
        mse3,time_passed3 = third_method(x,y)
        if np.isnan(mse3):
            mse3=-1
        total_mse_3 += mse3
        total_time_passed_3 +=time_passed3
        results3_df=results3_df.append({"Cell":cell,"MSE":"{:.2f}".format(mse3),
                                        "Passed":"{:.6f}".format(time_passed3)},ignore_index=True)
    except:
        pass
    
    counter+=1
    if counter==3:
        break
        

results_df=results_df.append({"Method Name":"Linear Regression","Total MSE":"{:.2f}".format(total_mse_1),
                              "Total Time Passed":"{:.1f}".format(total_time_passed_1)},ignore_index=True)
results_df=results_df.append({"Method Name":"Powell","Total MSE":"{:.2f}".format(total_mse_2),
                              "Total Time Passed":"{:.1f}".format(total_time_passed_2)},ignore_index=True)
results_df=results_df.append({"Method Name":"Curve Fit","Total MSE":"{:.2f}".format(total_mse_3),
                              "Total Time Passed":"{:.1f}".format(total_time_passed_3)},ignore_index=True)
results_df.to_csv(r"results.csv")
results1_df.to_csv(r"results_linear.csv")
results2_df.to_csv(r"results_powel.csv")
results3_df.to_csv(r"results_curve_fit.csv")

In [8]:
print(1)

1
