In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pmdarima as pm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.api import SimpleExpSmoothing
from prophet import Prophet

In [None]:
tiobe_df = pd.read_csv('new/tiobe.csv')
wiki_df = pd.read_csv('new/wiki.csv')
gtrend_df = pd.read_csv('new/gtrend.csv')
stack_df = pd.read_csv('new/stackoverflow.csv')


In [None]:
languages = list(wiki_df.columns)
languages.remove('Month')
df_list = [tiobe_df, wiki_df, gtrend_df, stack_df]
df_dict = {0:'tiobe', 1:'wikipedia', 2:'gtrend', 3:'stackOverflow'}

di = {}
di['language'] = languages

In [None]:
#ARIMA MODEL

for i, df_all in enumerate(df_list): 
    di['arima_mae_' + df_dict[i]] = []
    di['arima_mse_' + df_dict[i]] = []
    di['arima_rmse_' + df_dict[i]] = []

    for language in languages:
        if language in df_all.columns:
            df = df_all[language]

            splits = 5 if i==3 else 10
            tscv = TimeSeriesSplit(n_splits=splits)
            
            fold_mae, fold_mse, fold_rmse = [],[],[]

            for train_index, test_index in tscv.split(df):
                train, test = df.iloc[train_index], df.iloc[test_index]
                
                model = pm.auto_arima(train, seasonal=False, stepwise=True, trace=False)
                p, d, q = model.order
                
                arima_model = ARIMA(train, order=(p, d, q))
                model = arima_model.fit()

                forecast = model.forecast(steps=len(test))

                mae = mean_absolute_error(test, forecast)
                mse = mean_squared_error(test, forecast)
                rmse = np.sqrt(mse)

                fold_mae.append(mae)
                fold_mse.append(mse)
                fold_rmse.append(rmse)
        
        else:
            fold_mae, fold_mse, fold_rmse = [-1],[-1],[-1]

        avg_mae = np.mean(fold_mae)
        avg_mse = np.mean(fold_mse)
        avg_rmse = np.mean(fold_rmse)

        di['arima_mae_'+ df_dict[i]].append(avg_mae)
        di['arima_mse_' + df_dict[i]].append(avg_mse)
        di['arima_rmse_' + df_dict[i]].append(avg_rmse)



In [None]:
#SIMPLE EXPONENTIAL MODEL

for i, df_all in enumerate(df_list): 
    di['ses_mae_' + df_dict[i]] = []
    di['ses_mse_' + df_dict[i]] = []
    di['ses_rmse_' + df_dict[i]] = []

    for language in languages:
        if language in df_all.columns:
            df1 = df_all[language]
            mae_list, mse_list, rmse_list = [], [], []

            splits = 5 if i==3 else 10
            tscv = TimeSeriesSplit(n_splits=splits)
            
            for train_index, test_index in tscv.split(df1):
                train, test = df1[train_index], df1[test_index]
                
                ses = SimpleExpSmoothing(train)
                model = ses.fit(optimized=True)
                
                forecast = model.forecast(steps=len(test))
                
                mae = mean_absolute_error(test, forecast)
                mse = mean_squared_error(test, forecast)
                rmse = np.sqrt(mse)
                
                mae_list.append(mae)
                mse_list.append(mse)
                rmse_list.append(rmse)
        
        else:
            mae_list, mse_list, rmse_list = [-1],[-1],[-1]

        avg_mae = np.mean(mae_list)
        avg_mse = np.mean(mse_list)
        avg_rmse = np.mean(rmse_list)
        
        di['ses_mae_'+ df_dict[i]].append(avg_mae)
        di['ses_mse_' + df_dict[i]].append(avg_mse)
        di['ses_rmse_' + df_dict[i]].append(avg_rmse)


In [None]:
# HOLT-WINTERS MODEL

for i, df_all in enumerate(df_list): 
    di['holt_mae_' + df_dict[i]] = []
    di['holt_mse_' + df_dict[i]] = []
    di['holt_rmse_' + df_dict[i]] = []

    for language in languages:
        if language in df_all.columns:
            df1 = df_all[language]
            mae_list, mse_list, rmse_list = [], [], []

            splits = 5 if i==3 else 10
            tscv = TimeSeriesSplit(n_splits=splits)
            
            for train_index, test_index in tscv.split(df1):
                train, test = df1[train_index], df1[test_index]
                
                holt_model = ExponentialSmoothing(train, trend="additive", seasonal=None)
                model = holt_model.fit(optimized=True)
                
                forecast = model.forecast(steps=len(test))
                
                mae = mean_absolute_error(test, forecast)
                mse = mean_squared_error(test, forecast)
                rmse = np.sqrt(mse)
                
                mae_list.append(mae)
                mse_list.append(mse)
                rmse_list.append(rmse)
        
        else:
            mae_list, mse_list, rmse_list = [-1],[-1],[-1]

        avg_mae = np.mean(mae_list)
        avg_mse = np.mean(mse_list)
        avg_rmse = np.mean(rmse_list)
        
        di['holt_mae_'+ df_dict[i]].append(avg_mae)
        di['holt_mse_' + df_dict[i]].append(avg_mse)
        di['holt_rmse_' + df_dict[i]].append(avg_rmse)


In [None]:
# PROPHET MODEL

for i, df_all in enumerate(df_list): 
    di['prophet_mae_' + df_dict[i]] = []
    di['prophet_mse_' + df_dict[i]] = []
    di['prophet_rmse_' + df_dict[i]] = []

    for language in languages:
        if language in df_all.columns:
            tm = 'year' if i==3 else 'Month'
            df1 = df_all[[tm, language]]
            df1[tm] = pd.to_datetime(df1[tm], format='%Y') if tm == 'year' else pd.to_datetime(df1[tm])
            df1.columns = ['ds', 'y']
            mae_list, mse_list, rmse_list = [], [], []

            splits = 5 if i==3 else 10
            tscv = TimeSeriesSplit(n_splits=splits)
            
            for train_index, test_index in tscv.split(df1):
                train, test = df1.iloc[train_index], df1.iloc[test_index]

                prophet_df = train.reset_index(drop=True)
                prophet_df.columns = ['ds', 'y']

                prophet_model = Prophet(seasonality_mode='additive', yearly_seasonality=False, weekly_seasonality=False)
                prophet_model.fit(prophet_df)

                future = prophet_model.make_future_dataframe(periods=len(test))
                forecast = prophet_model.predict(future)

                test_df = test.reset_index(drop=True)
                test_df.columns = ['ds', 'y']

                forecast_test = forecast.iloc[-len(test):]

                mae = mean_absolute_error(test_df['y'], forecast_test['yhat'])
                mse = mean_squared_error(test_df['y'], forecast_test['yhat'])
                rmse = np.sqrt(mse)

                mae_list.append(mae)
                mse_list.append(mse)
                rmse_list.append(rmse)
        
        else:
            mae_list, mse_list, rmse_list = [-1],[-1],[-1]

        avg_mae = np.mean(mae_list)
        avg_mse = np.mean(mse_list)
        avg_rmse = np.mean(rmse_list)
        
        di['prophet_mae_'+ df_dict[i]].append(avg_mae)
        di['prophet_mse_' + df_dict[i]].append(avg_mse)
        di['prophet_rmse_' + df_dict[i]].append(avg_rmse)




In [None]:
df_res = pd.DataFrame.from_dict(di)
df_res.to_csv('result_timeseries.csv', index=False)

In [11]:
df_res.head()

Unnamed: 0,language,arima_mae_tiobe,arima_mse_tiobe,arima_rmse_tiobe,arima_mae_wikipedia,arima_mse_wikipedia,arima_rmse_wikipedia,arima_mae_gtrend,arima_mse_gtrend,arima_rmse_gtrend,...,prophet_rmse_tiobe,prophet_mae_wikipedia,prophet_mse_wikipedia,prophet_rmse_wikipedia,prophet_mae_gtrend,prophet_mse_gtrend,prophet_rmse_gtrend,prophet_mae_stackOverflow,prophet_mse_stackOverflow,prophet_rmse_stackOverflow
0,Java,0.119842,0.025445,0.136695,0.178869,0.049087,0.212511,0.061121,0.008737,0.068036,...,0.1325,0.154517,0.036692,0.184164,0.057844,0.007082,0.063714,0.306663,0.117477,0.306663
1,JavaScript,0.2105,0.067358,0.243572,0.102025,0.034808,0.11943,0.125514,0.036125,0.156291,...,0.255364,0.205134,0.095957,0.221731,0.14588,0.047965,0.17153,0.227254,0.061514,0.227254
2,PHP,0.10234,0.027085,0.115292,0.105663,0.019045,0.128879,0.084962,0.012336,0.09895,...,0.149709,0.090196,0.016239,0.118091,0.086419,0.011835,0.098613,0.277676,0.096484,0.277676
3,Python,0.066918,0.008777,0.079447,0.115188,0.038033,0.13518,0.103449,0.023095,0.128905,...,0.096818,0.148665,0.057704,0.165874,0.110548,0.023539,0.132092,0.194725,0.057336,0.194725
4,MySQL,-1.0,-1.0,-1.0,0.105834,0.020038,0.128611,0.11711,0.022642,0.140118,...,-1.0,0.103835,0.01812,0.125507,0.133243,0.027752,0.153448,0.261686,0.08781,0.261686
