# Veri Analizleri

In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
from plotly import express
from plotly import graph_objects
from plotly import io

io.templates.default = "plotly_dark"

In [3]:
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.tsa.stattools import adfuller, acf, breakvar_heteroskedasticity_test

In [4]:
from statsmodels.graphics import tsaplots
import matplotlib.pyplot as plt

In [5]:
import plotly.graph_objects as go
import statsmodels.api as sm 

In [6]:
country_name = ["France", "Greece",	"Italy", "Portugal", "Spain", "Türkiye"]

In [7]:
df_2 = pd.read_excel("OECD VERİLERİ ELİF BAĞCI TEZ V0_python.xlsx", sheet_name=7, header=1 )
df_2.head()

Unnamed: 0.1,Unnamed: 0,France,Greece,Italy,Portugal,Spain,Türkiye,t,interruption,interrupted t
0,2000,567.596,544.298,555.592,496.533,436.527,165.598,1,0,0
1,2001,606.352,625.833,546.051,509.297,473.841,154.849,2,0,0
2,2002,656.392,771.521,574.128,514.29,522.286,156.127,3,0,0
3,2003,648.6,822.245,564.699,564.14,582.27,146.766,4,0,0
4,2004,672.672,869.229,582.27,612.965,608.266,158.063,5,0,0


In [8]:
def create_corr_plot(series, plot_pacf=False):
    corr_array = pacf(series.dropna(), alpha=0.05) if plot_pacf else acf(series.dropna(), alpha=0.05)
    lower_y = corr_array[1][:,0] - corr_array[0]
    upper_y = corr_array[1][:,1] - corr_array[0]

    fig = go.Figure()
    [fig.add_scatter(x=(x,x), y=(0,corr_array[0][x]), mode='lines',line_color='#3f3f3f') 
     for x in range(len(corr_array[0]))]
    fig.add_scatter(x=np.arange(len(corr_array[0])), y=corr_array[0], mode='markers', marker_color='#1f77b4',
                   marker_size=12)
    fig.add_scatter(x=np.arange(len(corr_array[0])), y=upper_y, mode='lines', line_color='rgba(255,255,255,0)')
    fig.add_scatter(x=np.arange(len(corr_array[0])), y=lower_y, mode='lines',fillcolor='rgba(32, 146, 230,0.3)',
            fill='tonexty', line_color='rgba(255,255,255,0)')
    fig.update_traces(showlegend=False)
    fig.update_xaxes(range=[-1,13])
    fig.update_yaxes(zerolinecolor='#000000')
    
    title='Partial Autocorrelation (PACF)' if plot_pacf else 'Autocorrelation (ACF)'
    fig.update_layout(title=title)
    fig.show()

In [9]:
for country in country_name:
    print("###########################")
    print(f"{country}'nin Zaman Serisi Analizi")
    print("###########################")

    trendlin = express.scatter(
        df_2,
        x="t",
        y=f"{country}",
        title=f"{country}'s Voluntary schemes/household out-of-pocket payment/ US$ purchasing power parities (current prices, current PPPs)",
        trendline = "ols"
    )

    trendlin.show()
    linear_model = ols(f"{country} ~ t", data=df_2).fit()
    print(linear_model.summary())
    print(" ", end="\n")
    print("###########################")
    print("Verinin Durağanlık Kontrolü", end="\n\n")
    print("H0: Seri durağan değildir.")
    print("H1: Seri durağandır.", end="\n\n")
    adfullerx = adfuller(df_2[[f"{country}"]])
    print(f'Test Stat = {round(adfullerx[0], 4)}, p-value = {round(adfullerx[1], 4)}')

    if adfullerx[1]<0.05:
        print("p-value 0.05'ten küçüktür. H0 hipotezi reddedilmiştir. Yani seri durağandır.")
    elif adfullerx[1]>0.05:
        print("p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani seri durağan değildir.", end="\n\n")
    print("###########################")
    print("Hataların Normal Dağılması", end="\n\n")
    residual_values = linear_model.resid

    print("H0: Normal dağılım varsayımı sağlanmaktadır.")
    print("H1: Normal dağılım varsayımı sağlanmamaktadır.", end="\n\n")

    test_stat, pvalue = stats.shapiro(residual_values)
    print(f'Test Stat = {round(test_stat, 4)}, p-value = {round(pvalue, 4)}')
    if pvalue<0.05:
        print("p-value 0.05'ten küçüktür. H0 hipotezi reddedilmiştir. Hatalar normal dağılmamıştır.")
    elif pvalue>0.05:
        print("p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani hatalar normal dağılmıştır.", end="\n\n")
    hist = express.histogram(residual_values, title="Hataların Dağılımı")
    hist.show()

    print("###########################")
    print("Hataların Rassallığı", end="\n\n")

    print("     Hataların Durağanlığı", end="\n\n")

    adfullery = adfuller(residual_values)
    print(f'Test Stat = {round(adfullery[0], 4)}, p-value = {round(adfullery[1], 4)}')

    if adfullery[1]<0.05:
        print("p-value 0.05'ten küçüktür. H0 hipotezi reddedilmiştir. Hatalar durağandır.")
    elif adfullery[1]>0.05:
        print("p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani hatalar durağan değildir.", end="\n\n")

    resid_scatter = express.scatter(
        df_2,
        x="t",
        y=residual_values,
        title="Hataların Rassallığı"
    )

    resid_scatter.show()


    print("     Hataların Otokorelasyonu", end="\n\n")

    print("Ljung-Box q istatistiğinin Hipotezleri")
    print("H0: Veriler bağımsız (random) olarak dağılmıştır.")
    print("H1: Veriler bağımsız (random) olarak dağılmamıştır.", end="\n\n")

    acf_df = pd.DataFrame(pd.DataFrame(acf(residual_values, qstat=True)).T)
    acf_df.columns = ["ACF", "Ljung-Box q statistic", "p-value"]
    display(acf_df[1:6])

    create_corr_plot(residual_values)


###########################
France'nin Zaman Serisi Analizi
###########################


                            OLS Regression Results                            
Dep. Variable:                 France   R-squared:                       0.419
Model:                            OLS   Adj. R-squared:                  0.389
Method:                 Least Squares   F-statistic:                     13.71
Date:                Tue, 31 Jan 2023   Prob (F-statistic):            0.00151
Time:                        19:15:52   Log-Likelihood:                -124.88
No. Observations:                  21   AIC:                             253.8
Df Residuals:                      19   BIC:                             255.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    663.0053     44.026     15.060      0.0

###########################
Hataların Rassallığı

     Hataların Durağanlığı

Test Stat = -1.4133, p-value = 0.5759
p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani hatalar durağan değildir.



     Hataların Otokorelasyonu

Ljung-Box q istatistiğinin Hipotezleri
H0: Veriler bağımsız (random) olarak dağılmıştır.
H1: Veriler bağımsız (random) olarak dağılmamıştır.



Unnamed: 0,ACF,Ljung-Box q statistic,p-value
1,0.762825,21.032603,2.7e-05
2,0.523988,23.159379,3.7e-05
3,0.281529,23.315577,0.00011
4,0.074146,23.914245,0.000226
5,-0.140825,27.93126,9.7e-05


###########################
Greece'nin Zaman Serisi Analizi
###########################


                            OLS Regression Results                            
Dep. Variable:                 Greece   R-squared:                       0.335
Model:                            OLS   Adj. R-squared:                  0.300
Method:                 Least Squares   F-statistic:                     9.576
Date:                Tue, 31 Jan 2023   Prob (F-statistic):            0.00597
Time:                        19:15:52   Log-Likelihood:                -121.59
No. Observations:                  21   AIC:                             247.2
Df Residuals:                      19   BIC:                             249.3
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    724.5412     37.648     19.245      0.0

###########################
Hataların Rassallığı

     Hataların Durağanlığı

Test Stat = -3.8065, p-value = 0.0028
p-value 0.05'ten küçüktür. H0 hipotezi reddedilmiştir. Hatalar durağandır.


     Hataların Otokorelasyonu

Ljung-Box q istatistiğinin Hipotezleri
H0: Veriler bağımsız (random) olarak dağılmıştır.
H1: Veriler bağımsız (random) olarak dağılmamıştır.



Unnamed: 0,ACF,Ljung-Box q statistic,p-value
1,0.660767,12.575455,0.001859
2,0.282673,12.575459,0.005651
3,-0.000407,13.76851,0.008072
4,-0.204918,16.887632,0.004718
5,-0.321442,23.114315,0.000759


###########################
Italy'nin Zaman Serisi Analizi
###########################


                            OLS Regression Results                            
Dep. Variable:                  Italy   R-squared:                       0.939
Model:                            OLS   Adj. R-squared:                  0.936
Method:                 Least Squares   F-statistic:                     293.6
Date:                Tue, 31 Jan 2023   Prob (F-statistic):           5.19e-13
Time:                        19:15:52   Log-Likelihood:                -102.33
No. Observations:                  21   AIC:                             208.7
Df Residuals:                      19   BIC:                             210.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    476.2515     15.048     31.650      0.0

###########################
Hataların Rassallığı

     Hataların Durağanlığı

Test Stat = -2.4268, p-value = 0.1343
p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani hatalar durağan değildir.



     Hataların Otokorelasyonu

Ljung-Box q istatistiğinin Hipotezleri
H0: Veriler bağımsız (random) olarak dağılmıştır.
H1: Veriler bağımsız (random) olarak dağılmamıştır.



Unnamed: 0,ACF,Ljung-Box q statistic,p-value
1,0.608875,12.174334,0.002272
2,0.355971,12.189061,0.006763
3,0.023428,12.199484,0.015928
4,-0.019154,13.068489,0.022745
5,-0.169667,13.64421,0.033873


###########################
Portugal'nin Zaman Serisi Analizi
###########################


                            OLS Regression Results                            
Dep. Variable:               Portugal   R-squared:                       0.943
Model:                            OLS   Adj. R-squared:                  0.940
Method:                 Least Squares   F-statistic:                     311.7
Date:                Tue, 31 Jan 2023   Prob (F-statistic):           3.03e-13
Time:                        19:15:53   Log-Likelihood:                -114.27
No. Observations:                  21   AIC:                             232.5
Df Residuals:                      19   BIC:                             234.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    460.1991     26.568     17.322      0.0

###########################
Hataların Rassallığı

     Hataların Durağanlığı

Test Stat = -2.3362, p-value = 0.1606
p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani hatalar durağan değildir.



     Hataların Otokorelasyonu

Ljung-Box q istatistiğinin Hipotezleri
H0: Veriler bağımsız (random) olarak dağılmıştır.
H1: Veriler bağımsız (random) olarak dağılmamıştır.



Unnamed: 0,ACF,Ljung-Box q statistic,p-value
1,0.577161,9.206239,0.010021
2,0.213754,9.591947,0.022373
3,-0.119892,14.295186,0.00641
4,-0.406864,20.499254,0.001007
5,-0.453341,25.169302,0.000318


###########################
Spain'nin Zaman Serisi Analizi
###########################


                            OLS Regression Results                            
Dep. Variable:                  Spain   R-squared:                       0.966
Model:                            OLS   Adj. R-squared:                  0.964
Method:                 Least Squares   F-statistic:                     531.8
Date:                Tue, 31 Jan 2023   Prob (F-statistic):           2.36e-15
Time:                        19:15:53   Log-Likelihood:                -102.55
No. Observations:                  21   AIC:                             209.1
Df Residuals:                      19   BIC:                             211.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    437.3161     15.203     28.766      0.0

###########################
Hataların Rassallığı

     Hataların Durağanlığı

Test Stat = -2.4079, p-value = 0.1395
p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani hatalar durağan değildir.



     Hataların Otokorelasyonu

Ljung-Box q istatistiğinin Hipotezleri
H0: Veriler bağımsız (random) olarak dağılmıştır.
H1: Veriler bağımsız (random) olarak dağılmamıştır.



Unnamed: 0,ACF,Ljung-Box q statistic,p-value
1,0.629389,10.762989,0.004601
2,0.216944,10.894929,0.012308
3,-0.070121,11.547773,0.021051
4,-0.151585,14.703996,0.011705
5,-0.323348,23.04216,0.000782


###########################
Türkiye'nin Zaman Serisi Analizi
###########################


                            OLS Regression Results                            
Dep. Variable:                Türkiye   R-squared:                       0.743
Model:                            OLS   Adj. R-squared:                  0.730
Method:                 Least Squares   F-statistic:                     54.97
Date:                Tue, 31 Jan 2023   Prob (F-statistic):           5.09e-07
Time:                        19:15:53   Log-Likelihood:                -93.848
No. Observations:                  21   AIC:                             191.7
Df Residuals:                      19   BIC:                             193.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    141.1759     10.045     14.054      0.0

###########################
Hataların Rassallığı

     Hataların Durağanlığı

Test Stat = -6.0035, p-value = 0.0
p-value 0.05'ten küçüktür. H0 hipotezi reddedilmiştir. Hatalar durağandır.


     Hataların Otokorelasyonu

Ljung-Box q istatistiğinin Hipotezleri
H0: Veriler bağımsız (random) olarak dağılmıştır.
H1: Veriler bağımsız (random) olarak dağılmamıştır.



Unnamed: 0,ACF,Ljung-Box q statistic,p-value
1,0.572906,8.424732,0.014811
2,0.139992,9.391824,0.02451
3,-0.189844,14.045255,0.007152
4,-0.404704,17.582249,0.003518
5,-0.342297,18.501483,0.005094


# Interrupted Time Series for Türkiye

    Yukarıda referans ülkelerin ve Türkiye'nin zaman serisi regresyon analizi sonuçlarına bakılırken, bu kısımdan itibaren Türkiye için interrupted time series analysis yapılmıştır.

In [10]:
X = df_2[["t", "interruption", "interrupted t"]]
y = df_2["Türkiye"]

X = sm.add_constant(X) 

analys= sm.OLS(y, X).fit()

trendlin = express.scatter(
        df_2,
        x="t",
        y= "Türkiye",
        title="Türkiye's Voluntary schemes/household out-of-pocket payment/ US$ purchasing power parities (current prices, current PPPs)",
    )

trendlin.add_trace(
go.Scatter(
    x = [1,5],
    y = [analys.predict()[0], analys.predict()[4]],
)
 )   

trendlin.add_trace(
go.Scatter(
    x = [5,6],
    y = [analys.predict()[4], analys.predict()[5]]
)

 )

trendlin.add_trace(
go.Scatter(
    x = [6,21],
    y = [analys.predict()[5], analys.predict()[20]]
)

 )  

trendlin.show() 



print(analys.summary())

print("###########################")
print("Hataların Normal Dağılması", end="\n\n")
residual_values = analys.resid
print("H0: Normal dağılım varsayımı sağlanmaktadır.")
print("H1: Normal dağılım varsayımı sağlanmamaktadır.", end="\n\n")
test_stat, pvalue = stats.shapiro(residual_values)
print(f'Test Stat = {round(test_stat, 4)}, p-value = {round(pvalue, 4)}')
if pvalue<0.05:
    print("p-value 0.05'ten küçüktür. H0 hipotezi reddedilmiştir. Hatalar normal dağılmamıştır.")
elif pvalue>0.05:
     print("p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani hatalar normal dağılmıştır.", end="\n\n")
hist = express.histogram(residual_values, title="Hataların Dağılımı")
hist.show()
print("###########################")
print("Hataların Rassallığı", end="\n\n")
print("     Hataların Durağanlığı", end="\n\n")
adfullery = adfuller(residual_values)
print(f'Test Stat = {round(adfullery[0], 4)}, p-value = {round(adfullery[1], 4)}')
if adfullery[1]<0.05:
    print("p-value 0.05'ten küçüktür. H0 hipotezi reddedilmiştir. Hatalar durağandır.")
elif adfullery[1]>0.05:
    print("p-value 0.05'ten büyüktür. H0 hipotezi reddedilememiştir. Yani hatalar durağan değildir.", end="\n\n")
resid_scatter = express.scatter(
    df_2,
    x="t",
    y=residual_values,
    title="Hataların Rassallığı"
)
resid_scatter.show()
print("     Hataların Otokorelasyonu", end="\n\n")
print("Ljung-Box q istatistiğinin Hipotezleri")
print("H0: Veriler bağımsız (random) olarak dağılmıştır.")
print("H1: Veriler bağımsız (random) olarak dağılmamıştır.", end="\n\n")
acf_df = pd.DataFrame(pd.DataFrame(acf(residual_values, qstat=True)).T)
acf_df.columns = ["ACF", "Ljung-Box q statistic", "p-value"]
display(acf_df[1:6])
create_corr_plot(residual_values)

                            OLS Regression Results                            
Dep. Variable:                Türkiye   R-squared:                       0.763
Model:                            OLS   Adj. R-squared:                  0.722
Method:                 Least Squares   F-statistic:                     18.28
Date:                Tue, 31 Jan 2023   Prob (F-statistic):           1.46e-05
Time:                        19:15:53   Log-Likelihood:                -92.987
No. Observations:                  21   AIC:                             194.0
Df Residuals:                      17   BIC:                             198.2
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const           163.2265     23.625      6.909

###########################
Hataların Rassallığı

     Hataların Durağanlığı

Test Stat = -6.6079, p-value = 0.0
p-value 0.05'ten küçüktür. H0 hipotezi reddedilmiştir. Hatalar durağandır.


     Hataların Otokorelasyonu

Ljung-Box q istatistiğinin Hipotezleri
H0: Veriler bağımsız (random) olarak dağılmıştır.
H1: Veriler bağımsız (random) olarak dağılmamıştır.



Unnamed: 0,ACF,Ljung-Box q statistic,p-value
1,0.605687,10.410359,0.005488
2,0.246987,10.427625,0.01526
3,-0.025367,12.593387,0.013443
4,-0.276094,18.602962,0.002278
5,-0.446178,22.313608,0.001062
