# PO PW - ANALIZA REGRESJI I PROGNOZY WSTĘPNE

## Importowanie bibliotek

In [185]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## 1. Ładowanie i czyszczenie danych

In [186]:
df = pd.read_excel("data.xlsx")

Sprawdzenie braków danych

In [187]:
df.isna().any()

rok                            False
pkb_na_1_mieszkanca            False
stopa_ref                      False
kurs_eur                       False
inflacja                       False
podm_10_tys                    False
podmioty                       False
msp                            False
podm_kap_zagr                  False
podm_kap_zagr_przychody        False
srodki_trw                     False
przych_msp                     False
naklady_inw                    False
naklady_br_ogolem              False
naklady_br_przedsiebiorstwa    False
biotechnologia                 False
wsp_innow_RPO                  False
wsp_innow_PO_PW                False
eksport_msp                    False
akt_inn                        False
nakl_inn                       False
dtype: bool

In [188]:
df.columns

Index(['rok', 'pkb_na_1_mieszkanca', 'stopa_ref', 'kurs_eur', 'inflacja',
       'podm_10_tys', 'podmioty', 'msp', 'podm_kap_zagr',
       'podm_kap_zagr_przychody', 'srodki_trw', 'przych_msp', 'naklady_inw',
       'naklady_br_ogolem', 'naklady_br_przedsiebiorstwa', 'biotechnologia',
       'wsp_innow_RPO', 'wsp_innow_PO_PW', 'eksport_msp', 'akt_inn',
       'nakl_inn'],
      dtype='object')

## 2. Analiza eksploracyjna 

Wskaźniki makroekonomiczne

In [189]:
df1 = df[["pkb_na_1_mieszkanca", "stopa_ref", "kurs_eur", "inflacja", "eksport_msp", "akt_inn", "nakl_inn"]]

In [190]:
# sns.pairplot(df1)

Wskaźniki dotyczące podmiotów gospodarczych

In [191]:
df2 = df[['podm_10_tys', 'podmioty', 'msp', 'podm_kap_zagr', 'podm_kap_zagr_przychody', 'srodki_trw', 'przych_msp', 'naklady_inw', "eksport_msp", "akt_inn", "nakl_inn"]]

In [192]:
# sns.pairplot(df2)

Wskaźniki dotyczące innowacyjności

In [193]:
df3 = df[['naklady_br_ogolem', 'naklady_br_przedsiebiorstwa', 'biotechnologia', 'eksport_msp', 'akt_inn', 'nakl_inn']]

In [194]:
# sns.pairplot(df3)

Wskaźniki dotyczące wsparcia unijnego

In [195]:
df4 = df[['wsp_innow_RPO', 'wsp_innow_PO_PW',
       'eksport_msp', 'akt_inn', 'nakl_inn']]

In [196]:
# sns.pairplot(df4)

## 2. Eksport MSP

### 2.1. Najważniejsze współczynniki

In [197]:
X = df[['rok', 'pkb_na_1_mieszkanca', 'stopa_ref', 'kurs_eur', 'inflacja',
       'podm_10_tys', 'podmioty', 'msp', 'podm_kap_zagr',
       'podm_kap_zagr_przychody', 'srodki_trw', 'przych_msp', 'naklady_inw',
       'naklady_br_ogolem', 'naklady_br_przedsiebiorstwa', 'biotechnologia',
       'wsp_innow_RPO', 'wsp_innow_PO_PW', 
       'akt_inn', 'nakl_inn']]

y = df[['eksport_msp']]

In [198]:
x_log = np.log(X)
y_log = np.log(y)

In [199]:
x_log = x_log.replace(-np.inf, 0)

In [200]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.1)

lasso.fit(x_log, y_log)

lasso_coef = lasso.fit(x_log, y_log).coef_

In [201]:
names = x_log.columns
coef_df = pd.DataFrame({'wskaznik': names, 'wspolczynnik': lasso_coef})

In [202]:
coef_df.sort_values('wspolczynnik', ascending=True)

Unnamed: 0,wskaznik,wspolczynnik
0,rok,0.0
17,wsp_innow_PO_PW,0.0
16,wsp_innow_RPO,0.0
15,biotechnologia,0.0
14,naklady_br_przedsiebiorstwa,0.0
13,naklady_br_ogolem,0.0
12,naklady_inw,0.0
11,przych_msp,0.0
10,srodki_trw,0.0
9,podm_kap_zagr_przychody,0.0


### 2.2. Regresja

In [203]:
import statsmodels.api as sm

In [204]:
X = sm.add_constant(x_log)

In [205]:
model = sm.OLS(y_log, X).fit()

In [206]:
model.summary()

  warn("omni_normtest is not valid with less than 8 observations; %i "
  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  return np.dot(wresid, wresid) / self.df_resid


0,1,2,3
Dep. Variable:,eksport_msp,R-squared:,1.0
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,
Date:,"Mon, 07 Nov 2022",Prob (F-statistic):,
Time:,08:21:32,Log-Likelihood:,145.77
No. Observations:,5,AIC:,-281.5
Df Residuals:,0,BIC:,-283.5
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0070,inf,0,,,
rok,0.0530,inf,0,,,
pkb_na_1_mieszkanca,0.0620,inf,0,,,
stopa_ref,-0.0023,inf,-0,,,
kurs_eur,0.0099,inf,0,,,
inflacja,0.0315,inf,0,,,
podm_10_tys,0.0430,inf,0,,,
podmioty,0.0902,inf,0,,,
msp,0.0902,inf,0,,,

0,1,2,3
Omnibus:,,Durbin-Watson:,0.008
Prob(Omnibus):,,Jarque-Bera (JB):,0.375
Skew:,-0.344,Prob(JB):,0.829
Kurtosis:,1.847,Cond. No.,286.0


## 3. Nakłady na innowacyjność

### 3.1. Najważniejsze współczynniki

In [207]:
X = df[['pkb_na_1_mieszkanca', 'stopa_ref', 'kurs_eur', 'inflacja',
       'podm_10_tys', 'podmioty', 'msp', 'podm_kap_zagr',
       'podm_kap_zagr_przychody', 'srodki_trw', 'przych_msp', 'naklady_inw',
       'naklady_br_ogolem', 'naklady_br_przedsiebiorstwa', 'biotechnologia',
       'wsp_innow_RPO', 'wsp_innow_PO_PW', 
       'akt_inn', 'eksport_msp']]

y = df[['nakl_inn']]

In [208]:
x_log = np.log(X)
y_log = np.log(y)

In [209]:
x_log = x_log.replace(-np.inf, 0)

In [210]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.1)

lasso.fit(x_log, y_log)

lasso_coef = lasso.fit(x_log, y_log).coef_

In [211]:
names = x_log.columns
coef_df = pd.DataFrame({'wskaznik': names, 'wspolczynnik': lasso_coef})

In [212]:
coef_df.sort_values('wspolczynnik', ascending=True)

Unnamed: 0,wskaznik,wspolczynnik
8,podm_kap_zagr_przychody,-0.001397
0,pkb_na_1_mieszkanca,0.0
15,wsp_innow_RPO,0.0
14,biotechnologia,0.0
13,naklady_br_przedsiebiorstwa,0.0
12,naklady_br_ogolem,0.0
11,naklady_inw,0.0
10,przych_msp,0.0
17,akt_inn,0.0
9,srodki_trw,0.0


### 3.2. Regresja

In [213]:
import statsmodels.api as sm

In [214]:
X = sm.add_constant(x_log)

In [215]:
model = sm.OLS(y_log, X).fit()

In [216]:
model.summary()

  warn("omni_normtest is not valid with less than 8 observations; %i "
  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  return np.dot(wresid, wresid) / self.df_resid


0,1,2,3
Dep. Variable:,nakl_inn,R-squared:,1.0
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,
Date:,"Mon, 07 Nov 2022",Prob (F-statistic):,
Time:,08:21:33,Log-Likelihood:,157.07
No. Observations:,5,AIC:,-304.1
Df Residuals:,0,BIC:,-306.1
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0006,inf,0,,,
pkb_na_1_mieszkanca,0.0592,inf,0,,,
stopa_ref,0.0067,inf,0,,,
kurs_eur,0.0267,inf,0,,,
inflacja,-0.0026,inf,-0,,,
podm_10_tys,0.0266,inf,0,,,
podmioty,0.0280,inf,0,,,
msp,0.0280,inf,0,,,
podm_kap_zagr,0.1064,inf,0,,,

0,1,2,3
Omnibus:,,Durbin-Watson:,0.458
Prob(Omnibus):,,Jarque-Bera (JB):,0.335
Skew:,-0.568,Prob(JB):,0.846
Kurtosis:,2.436,Cond. No.,302.0


## 4. Odsetek firm innowacyjnych

### 4.1. Najważniejsze współczynniki

In [217]:
X = df[['pkb_na_1_mieszkanca', 'stopa_ref', 'kurs_eur', 'inflacja',
       'podm_10_tys', 'podmioty', 'msp', 'podm_kap_zagr',
       'podm_kap_zagr_przychody', 'srodki_trw', 'przych_msp', 'naklady_inw',
       'naklady_br_ogolem', 'naklady_br_przedsiebiorstwa', 'biotechnologia',
       'wsp_innow_RPO', 'wsp_innow_PO_PW', 
       'nakl_inn', 'eksport_msp']]

y = df[['akt_inn']]

In [218]:
x_log = np.log(X)
y_log = np.log(y)

In [219]:
x_log = x_log.replace(-np.inf, 0)

In [220]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.1)

lasso.fit(x_log, y_log)

lasso_coef = lasso.fit(x_log, y_log).coef_

In [221]:
names = x_log.columns
coef_df = pd.DataFrame({'wskaznik': names, 'wspolczynnik': lasso_coef})

In [222]:
coef_df.sort_values('wspolczynnik', ascending=True)

Unnamed: 0,wskaznik,wspolczynnik
1,stopa_ref,-0.143668
0,pkb_na_1_mieszkanca,0.0
15,wsp_innow_RPO,0.0
14,biotechnologia,0.0
13,naklady_br_przedsiebiorstwa,0.0
12,naklady_br_ogolem,0.0
11,naklady_inw,0.0
10,przych_msp,0.0
17,nakl_inn,0.0
9,srodki_trw,0.0


### 4.2. Regresja

In [223]:
import statsmodels.api as sm

In [224]:
X = sm.add_constant(x_log)

In [225]:
model = sm.OLS(y_log, X).fit()

In [226]:
model.summary()

  warn("omni_normtest is not valid with less than 8 observations; %i "
  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  return np.dot(wresid, wresid) / self.df_resid


0,1,2,3
Dep. Variable:,akt_inn,R-squared:,1.0
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,
Date:,"Mon, 07 Nov 2022",Prob (F-statistic):,
Time:,08:38:05,Log-Likelihood:,158.39
No. Observations:,5,AIC:,-306.8
Df Residuals:,0,BIC:,-308.7
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0045,inf,-0,,,
pkb_na_1_mieszkanca,0.0380,inf,0,,,
stopa_ref,-0.1997,inf,-0,,,
kurs_eur,0.0427,inf,0,,,
inflacja,-0.0291,inf,-0,,,
podm_10_tys,0.0101,inf,0,,,
podmioty,-0.0257,inf,-0,,,
msp,-0.0257,inf,-0,,,
podm_kap_zagr,0.1288,inf,0,,,

0,1,2,3
Omnibus:,,Durbin-Watson:,0.174
Prob(Omnibus):,,Jarque-Bera (JB):,0.422
Skew:,0.32,Prob(JB):,0.81
Kurtosis:,1.729,Cond. No.,326.0
