In [1]:
%%capture
!pip install prophet
!pip install stan
!pip install pystan
!pip install nest_asyncio

In [2]:
import pandas as pd
from prophet import Prophet
import numpy as np
import math
import stan
import nest_asyncio
nest_asyncio.apply()

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/Jeremstar/business_is_business/main/England_covid.csv')
df.head()


Unnamed: 0,date,area_type,area_name,daily_cases,cum_cases,new_deaths_28days,cum_deaths_28days,new_deaths_60days,cum_deaths_60days,new_first_episode,...,new_third_dose,cum_third_dose,new_admissions,cum_admissions,hospital_cases,ventilator_beds,trans_rate_min,trans_rate_max,trans_growth_min,trans_growth_max
0,2023-02-01,nation,England,1573.0,20535061.0,,,,,892.0,...,625.0,33356701.0,,,6055.0,136.0,,,,
1,2023-01-31,nation,England,2678.0,20533488.0,,,,,1564.0,...,587.0,33356076.0,,,6015.0,136.0,,,,
2,2023-01-30,nation,England,2886.0,20530810.0,30.0,180801.0,41.0,226052.0,1724.0,...,613.0,33355489.0,741.0,958745.0,5959.0,142.0,,,,
3,2023-01-29,nation,England,2118.0,20527924.0,44.0,180771.0,72.0,226011.0,1224.0,...,247.0,33354876.0,705.0,958004.0,5834.0,145.0,,,,
4,2023-01-28,nation,England,1711.0,20525806.0,47.0,180727.0,80.0,225939.0,1119.0,...,1326.0,33354629.0,562.0,957299.0,5750.0,140.0,,,,


In [4]:
#Nettoyage des données
df = df[["date", "cum_deaths_60days","cum_virus_test","cum_cases"]]
df.head()

Unnamed: 0,date,cum_deaths_60days,cum_virus_test,cum_cases
0,2023-02-01,,539856513,20535061.0
1,2023-01-31,,539836162,20533488.0
2,2023-01-30,226052.0,539813539,20530810.0
3,2023-01-29,226011.0,539788759,20527924.0
4,2023-01-28,225939.0,539765505,20525806.0


In [5]:
missing = df.isnull().sum()
print(missing)
print(df.size)

#On décide d'enlever les valeurs manquantes
df = df.dropna(axis=0)
df.head()

date                  0
cum_deaths_60days    61
cum_virus_test        0
cum_cases            27
dtype: int64
4504


Unnamed: 0,date,cum_deaths_60days,cum_virus_test,cum_cases
2,2023-01-30,226052.0,539813539,20530810.0
3,2023-01-29,226011.0,539788759,20527924.0
4,2023-01-28,225939.0,539765505,20525806.0
5,2023-01-27,225859.0,539741262,20524095.0
6,2023-01-26,225762.0,539709975,20521906.0


In [6]:
#On transforme la date en donnée incrémentale 
df = df.sort_values(by='date')
df['j'] = np.arange(df.shape[0])
df["j"] = df['j'] + 1
df.tail()

Unnamed: 0,date,cum_deaths_60days,cum_virus_test,cum_cases,j
6,2023-01-26,225762.0,539709975,20521906.0,1061
5,2023-01-27,225859.0,539741262,20524095.0,1062
4,2023-01-28,225939.0,539765505,20525806.0,1063
3,2023-01-29,226011.0,539788759,20527924.0,1064
2,2023-01-30,226052.0,539813539,20530810.0,1065


In [7]:
#fonction pour implémenter la matrice design à partir de j (voir mémoire pour la méthodologie). Elle prend un j et retourne le vecteur associé. 
def find_var_expl_time(j): 
    x_j = [1]
    x_j.append(j/365*3)
    for l in range(2,46): 
        if l >= 2 and l <= 8: 
            x_j.append(math.cos(2*math.pi*(l-2)*j/7))
        elif l >= 9 and l <= 15:
            x_j.append(math.sin(2*math.pi*(l-9)*j/7))
        elif l >= 16 and l <= 30: 
            x_j.append(math.cos(2*math.pi*(l-16)*j/(365.25)))
        elif l >= 31 and l <= 45:
            x_j.append(math.sin(2*math.pi*(l-35)*j/(365.25)))
    return x_j

In [8]:
#Construction de la matrice design à partir de la fonction précèdente. 
mat_design = np.empty((0,46))
for j in df["j"]:
    row = np.array(find_var_expl_time(j))
    mat_design = np.r_[mat_design,[row]]

mat_design = pd.DataFrame(mat_design)

In [9]:
mat_design

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,36,37,38,39,40,41,42,43,44,45
0,1.0,0.008219,1.0,0.623490,-0.222521,-0.900969,-0.900969,-0.222521,0.623490,0.0,...,0.017202,0.034398,0.051584,0.068755,0.085906,0.103031,0.120126,0.137185,0.154204,0.171177
1,1.0,0.016438,1.0,-0.222521,-0.900969,0.623490,0.623490,-0.900969,-0.222521,0.0,...,0.034398,0.068755,0.103031,0.137185,0.171177,0.204966,0.238513,0.271777,0.304719,0.337301
2,1.0,0.024658,1.0,-0.900969,0.623490,-0.222521,-0.222521,0.623490,-0.900969,0.0,...,0.051584,0.103031,0.154204,0.204966,0.255182,0.304719,0.353445,0.401229,0.447945,0.493468
3,1.0,0.032877,1.0,-0.900969,0.623490,-0.222521,-0.222521,0.623490,-0.900969,0.0,...,0.068755,0.137185,0.204966,0.271777,0.337301,0.401229,0.463258,0.523094,0.580455,0.635068
4,1.0,0.041096,1.0,-0.222521,-0.900969,0.623490,0.623490,-0.900969,-0.222521,0.0,...,0.085906,0.171177,0.255182,0.337301,0.416926,0.493468,0.566362,0.635068,0.699079,0.757922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1060,1.0,8.720548,1.0,-0.900969,0.623490,-0.222521,-0.222521,0.623490,-0.900969,0.0,...,-0.562812,-0.930424,-0.975336,-0.681972,-0.152079,0.430560,0.863867,0.997559,0.785267,0.300620
1061,1.0,8.728767,1.0,-0.222521,-0.900969,0.623490,0.623490,-0.900969,-0.222521,0.0,...,-0.548511,-0.917267,-0.985424,-0.730644,-0.236424,0.335276,0.797102,0.997707,0.871351,0.459442
1062,1.0,8.736986,1.0,0.623490,-0.222521,-0.900969,-0.900969,-0.222521,0.623490,0.0,...,-0.534046,-0.903025,-0.992887,-0.775859,-0.319020,0.236424,0.718792,0.978989,0.936591,0.604702
1063,1.0,8.745205,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.0,...,-0.519424,-0.887713,-0.997707,-0.817401,-0.399258,0.135055,0.630072,0.941759,0.979425,0.732111


In [14]:
model_reg1 = """
data {
int<lower=0> N;
int<lower=0> K;
matrix[N, K] x;
vector[N] y;
}
parameters {
real alpha; // intercept
vector[K] beta; // coefficients for predictors
real<lower=0> sigma; // error scale
}
model {
y ~ normal(x * beta + alpha, sigma);
}
"""

In [15]:
Data = {"y": list(df["cum_virus_test"]),
                "N": 1065,
                "K": 46,
                "x": mat_design.to_json()}


posterior = stan.build(model_reg1, data=Data)
fit = posterior.sample(num_chains=4, num_samples=1000)
beta = fit["beta"]  
df2 = fit.to_frame()  # pandas `DataFrame`

Building...

In file included from /opt/mamba/lib/python3.10/site-packages/httpstan/include/stan/math/prim/fun.hpp:124,
                 from /opt/mamba/lib/python3.10/site-packages/httpstan/include/stan/math/rev/fun/multiply.hpp:7,
                 from /opt/mamba/lib/python3.10/site-packages/httpstan/include/stan/math/rev/fun/elt_multiply.hpp:9,
                 from /opt/mamba/lib/python3.10/site-packages/httpstan/include/stan/math/rev/fun.hpp:55,
                 from /opt/mamba/lib/python3.10/site-packages/httpstan/include/stan/math/rev.hpp:10,
                 from /opt/mamba/lib/python3.10/site-packages/httpstan/include/stan/math.hpp:19,
                 from /opt/mamba/lib/python3.10/site-packages/httpstan/include/stan/model/model_header.hpp:4,
                 from /home/onyxia/.cache/httpstan/4.9.1/models/j5lpltdz/model_j5lpltdz.cpp:2:
/opt/mamba/lib/python3.10/site-packages/httpstan/include/stan/math/prim/fun/grad_2F1.hpp: In instantiation of ‘TupleT stan::math::internal::grad_2F1_impl(c





Building: found in cache, done.Messages from stanc:
    provided, or the prior(s) depend on data variables. In the later case,
    this may be a false positive.
    provided, or the prior(s) depend on data variables. In the later case,
    this may be a false positive.
    provided, or the prior(s) depend on data variables. In the later case,
    this may be a false positive.


KeyError: 'message'

1065

In [None]:
df["cum_virus_test"]

In [None]:
Model = """
data {
  int N; 
  int y[N];
  int K;
  matrix[N, K] X;
}
parameters {
  real sigma;
  vector[K] beta;
  vector[K] alpha;
  real<lower=0> epsilon;
}
transformed parameters {
  vector[N] mu;
  mu = exponential(x*beta);
}
model {
  sigma ~ normal(0, 0.5)
  y ~ normal(alpha + beta * x, epsilon);
  y ~ neg_binomial_2_log(mu, sigma);
}
generated quantities {
  vector[N] mu;
  vector[N] log_lik;
  vector[N] y_rep;
  mu = exp(eta);
  for (i in 1:N) {
    log_lik[i] = neg_binomial_2_log_lpmf(y[i] | eta[i], phi);
    y_rep[i] = neg_binomial_2_rng(mu[i], phi);
  }
}
"""

In [None]:
Data = {"y": list(df["cum_virus_test"]),
                "N": 1065,
                "K": 46,
                "X": mat_design.to_json()}


posterior = stan.build(Model, data=Data)
fit = posterior.sample(num_chains=4, num_samples=1000)
df2 = fit.to_frame()  # pandas `DataFrame`