## Stochastic, spatially-explicit SEIR2 model with seasonal variation and age groups 

1. CREATING THE OD MATRIX FOR BELGIAN DISTRICTS (43)
2.  (for later) CREATING THE SOCIAL CONTACTS MATRIX FOR EACH AGE GROUP 
3. GETTING THE DEMOGRAPHIC INFO OF PEOPLE PER DISTRIC - and age-group 
4. LOADING THE SEIR2 MODEL
5. USING THE OD MATRIX TO REPRESENT DISEASE DYNAMICS IN SPATIALLY EXPLICIT MANER 

In [1]:
# import dependencies

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv

In [2]:
###############################
## 1. CREATING THE OD MATRIX ##
###############################

# 1.1 loading the mobility data

# I have 14 files with 1 day mobility data that I need to average over time
import glob
# Specify the path to the directory containing your CSV files
csv_files_path = '/home/rita/PyProjects/Practice/Spatially-explicit SIR model/mobdata/*.csv'

# Use the glob module to get a list of all CSV files in the specified path
csv_files = glob.glob(csv_files_path)

# Initialize an empty list to store DataFrames from each CSV file
dataframes = []

# Loop through the list of CSV files and read each file into a DataFrame
for csv_file in csv_files:
    df = pd.read_csv(csv_file) # I want to remove the first row and column from my dataframe 
    dataframes.append(df)

# concatenate the list of dfs into a single df
combined_df = pd.concat(dataframes, ignore_index=True)
average_df = combined_df.groupby('mllp_postalcode', as_index= False).mean()
#print(average_df)

#print(dataframes)
#print(combined_df)

# I will double check my averaging process for people going from 11000 to 12000 manually: 
# numbers = [0.011915, 0.019763, 0.014834, 0.011858, 0.019372, 0.018516, 0.014299,  0.019145, 0.020209, 0.018978, 0.020856, 0.020529, 0.018907, 0.015570 ]
# average = sum(numbers)/len(numbers)
# print(average) #OK correct

#this is now my OD matrix for one standard day:
ODmatrix = average_df
# print(ODmatrix) # I want to remove the abroad column and the foreigner row 
ODmatrixx = ODmatrix.iloc[:-1, :-1]

# print("New OD matrix:" ,ODmatrixx) #OK
ODmatrix = ODmatrixx

# my first column of my OD matrix is actually the distric codes, not the amount of people
#print(ODmatrix.iloc[:,0]) 

# I set this first column to names: 
# Set the values in the first column as the index
#print(ODmatrix.columns)
ODmatrix.set_index('mllp_postalcode', inplace=True) # now my OD matrix is a 43 x 43

print(str(ODmatrix), type(ODmatrix)) #[43 rows x 43 columns] <class 'pandas.core.frame.DataFrame'>

                    11000     12000     13000     21000     23000     24000  \
mllp_postalcode                                                               
11000            0.907668  0.017482  0.011743  0.006954  0.007673  0.005690   
12000            0.054619  0.822509  0.015239  0.012735  0.023885  0.024053   
13000            0.032686  0.013118  0.884876  0.003587  0.004881  0.015349   
21000            0.003285  0.001787  0.001041  0.897564  0.039124  0.005228   
23000            0.007327  0.010966  0.002156  0.104044  0.779450  0.015238   
24000            0.009977  0.015757  0.009147  0.026770  0.029043  0.837363   
25000            0.002331  0.000952  0.000814  0.057851  0.022149  0.009447   
31000            0.004876  0.001421  0.001852  0.005286  0.004823  0.002870   
32000            0.004042  0.001320  0.001745  0.002631  0.004091  0.002448   
33000            0.003305  0.000991  0.001377  0.002541  0.003600  0.002182   
34000            0.004282  0.001297  0.001716  0.003

My OD matrix a 43 x 43 matrix, which will be the same for every age group

In [3]:
#######################################
# 2. DOWNLOADING THE DEMOGRAPHY DATA ##
#######################################

df_age = pd.read_csv("/home/rita/PyProjects/Practice/Spatially-explicit SIR model/age_structure_per_arr.csv")
# print(df_age)
# print(type(df_age))

# I need to divide this into the age-groups used in the SEIR2 model: 
age_groups = pd.IntervalIndex.from_tuples([(0,5),(5,15),(15,65),(65,120)], closed='left')

# Categorize ages into age groups - create a new variable
df_age['age_group'] = pd.cut(df_age['age'], bins=age_groups)
# print(df_age)

# Group by location and age group, then sum the counts
grouped_df = df_age.groupby(['NIS', 'age_group']).agg({'number': 'sum'}).reset_index()
# print(grouped_df) # final dataframe has 172 rows ( = 43 districts x 4 agegroups)

# I need to change the age group into a new dimension so that I obtain a matrix of 43x1 for every one of the 4 age groups 
# Pivot the DataFrame to have age groups as columns, locations as rows, and counts as values
pivot_df = grouped_df.pivot(index='NIS', columns='age_group', values='number').fillna(0)
locations = pivot_df.index.values # save location names

# Set location names as row names
pivot_df.index = locations
# print("pivot_df", pivot_df) # now each age-group is represented by a different column

# Convert pivot_df to ndarray
ndarray_2d = pivot_df.to_numpy()
# Create structured ndarray with location names as row names
dtype = [('location', 'U20')] + [(str(age_group), 'int') for age_group in pivot_df.columns]
structured_ndarray = np.empty(len(locations), dtype=dtype)

# Fill structured ndarray with location names and counts
structured_ndarray['location'] = locations
for i, age_group in enumerate(pivot_df.columns):
    structured_ndarray[str(age_group)] = ndarray_2d[:, i]

print("Structured ndarray with Location Names as Row Names:")
print(structured_ndarray)

print("Structure (Shape) of ndarray_2d:", ndarray_2d.shape)
print("Type of ndarray_2d:", ndarray_2d.dtype)

# #OK, this is my starting population at the beginning of the epidemic for every age group
sum_per_location = ndarray_2d

Structured ndarray with Location Names as Row Names:
[('11000', 61203, 126165, 669999, 200369)
 ('12000', 17430,  39665, 220022,  70008)
 ('13000', 22301,  50687, 297304,  94577)
 ('21000', 82968, 154611, 821083, 159593)
 ('23000', 35680,  80837, 406282, 120967)
 ('24000', 24381,  56019, 328099, 103578)
 ('25000', 20258,  48231, 259038,  78492)
 ('31000', 11527,  26754, 173189,  71275)
 ('32000',  2774,   5815,  32284,  10823)
 ('33000',  5191,  12075,  65867,  23437)
 ('34000', 15197,  32533, 182825,  61938)
 ('35000',  6538,  14482,  93658,  43102)
 ('36000',  7906,  17032,  97365,  32191)
 ('37000',  4662,  10367,  59103,  19296)
 ('38000',  2092,   4680,  34383,  20584)
 ('41000', 14411,  33085, 186306,  59848)
 ('42000',  9787,  22670, 128475,  41479)
 ('43000',  3911,   9060,  53776,  18945)
 ('44000', 28923,  61527, 366243, 107349)
 ('45000',  5913,  14597,  78498,  25602)
 ('46000', 13556,  30581, 159926,  50787)
 ('51000',  6261,  14906,  83168,  24133)
 ('52000', 22186,  4859

  grouped_df = df_age.groupby(['NIS', 'age_group']).agg({'number': 'sum'}).reset_index()


In [4]:
############################################################
# redistributing the people accoring to the OD matrix
############################################################

# This is the sum_per_location matrix x the OD matrix:

###################################
####### OD x population #######
###################################
ODmatrixT = np.transpose(ODmatrix)
# print("ODmatrix:", ODmatrix)
# print("ODmatrixT:", ODmatrixT)

ODmatrixT_ndarray = ODmatrixT.values
print(type(ODmatrixT_ndarray)) #<class 'numpy.ndarray'>
print("Shape ODmatrixT_ndarray", ODmatrixT_ndarray.shape)

sum_per_location_array = np.array(sum_per_location)
print("type sum_per_location", type(sum_per_location_array))   #<class 'numpy.ndarray'>
print("shape sum_per_location", sum_per_location_array.shape) #(43, 4)


# Now I mulitply the ODmatrixT with every age-group in the demographic data: 

# Create an empty list to store the results for each age group
result_matrices = []

# Loop over each age group in sum_per_location
for age_group_column in sum_per_location.T:  # Transpose to loop over columns

    # print("age_group_column", age_group_column)
    # print("shape age_group_column", age_group_column.shape)
    # print("shape ODmatrixT_ndarray", ODmatrixT_ndarray.shape)

    # Perform element-wise multiplication between ODmatrixT_ndarray and the current age group column
    result_matrix = ODmatrixT_ndarray @ age_group_column  # ndarray (43,)
    
    # Append the result to the list
    result_matrices.append(result_matrix) # list of 4 arrays

# Convert the list of result matrices to a numpy array
distr_new = np.array(result_matrices)
print("Shape distr_new", distr_new.shape) # (4, 43)

#print(distr_new)

<class 'numpy.ndarray'>
Shape ODmatrixT_ndarray (43, 43)
type sum_per_location <class 'numpy.ndarray'>
shape sum_per_location (43, 4)
Shape distr_new (4, 43)


We will now set up the model with an additional dimension for space  
These models are based on TWA's COVID-19 models in https://github.com/UGentBiomath/COVID19-Model/blob/master/src/covid19_DTM/models/SDE_models.py

In [5]:
################
## Load model ##
################

from DENV_models_pySODM import JumpProcess_SEIR2_spatial_stochastic as DENV_spat_stoch_SEIR2

In [6]:
#################
## Setup model ##
#################
# p = (273-181)/365
p = 0
# Define model parameters
params = {'alpha': 45 ,'sigma':5, 'gamma': 15, 'psi': 1.5, 'beta_0' : np.array([0.2, 0.15, 0.10, 0.05]), 'beta_1' : 0.5, 'ph':p} # I made younger groups more infectious than older, beta_1 is the same for all age groups

# Define initial condition
age_groups = pd.IntervalIndex.from_tuples([(0,5),(5,15),(15,65),(65,120)], closed='left')
locations = df_age["NIS"].unique()

Tau = 0.5
time = list(np.arange(0, 3*365, Tau))

# Using the hardcode Belgian demographics 
# Create a MultiIndex combining age groups and locations
multi_index = pd.MultiIndex.from_product([age_groups, locations], names=['age_group', 'NIS'])

# Flatten the sum_per_location array to match the MultiIndex
flattened_values = sum_per_location.flatten()

# Create initN Series with the MultiIndex and flattened values
initN = pd.Series(index=multi_index, data=flattened_values)

initN_df = initN.unstack(level='age_group')
initN_df.columns.name = 'age_group'
initN_df.index.name = 'NIS'
print("Shape of initN_df:", initN_df.shape) #(43, 4)


max_infected = initN_df * 0.0001
print("shape max_infected", max_infected.shape) #(43, 4)

# Generate random samples between 0 and the values in max_initI1
random_samples = np.random.rand(*max_infected.shape) * max_infected.values

# Create initI1 DataFrame with the same index and columns as max_infected
initI1 = pd.DataFrame(random_samples, index=max_infected.index, columns=max_infected.columns)

# Now initI1 has the shape (43, 4) matching max_infected
print("Shape of initI1:", initI1.shape)


#select random samples again for initI2
random_samples = np.random.rand(*max_infected.shape) * max_infected.values
initI2 = pd.DataFrame(random_samples, index=max_infected.index, columns=max_infected.columns)
print("Shape of initI2:", initI2.shape)


initS = initN_df - initI1 - initI2
print("Shape of initS:", initS.shape)

init_states = {'S': list(initS),
               'I1': list(initI1),
               'I2': list(initI2)}

# Define model coordinates
coordinates={'NIS' : locations,'age_group': age_groups}

Shape of initN_df: (43, 4)
shape max_infected (43, 4)
Shape of initI1: (43, 4)
Shape of initI2: (43, 4)
Shape of initS: (43, 4)


In [7]:
####################
# Initialize model #
####################

model_SEIR2_SF = DENV_spat_stoch_SEIR2(states=init_states, parameters=params, coordinates=coordinates) 

TypeError: float() argument must be a string or a real number, not 'pandas._libs.interval.Interval'

In [10]:
# print("InitN", InitN)
# print(initN_df)
print("locations shape", locations.shape, "locations type", type(locations))
print("age_groups shape", age_groups.shape, "age_groups type", type(age_groups)) #pandas.core.indexes.interval.IntervalIndex'>


locations shape (43,) locations type <class 'numpy.ndarray'>
age_groups shape (4,) age_groups type <class 'pandas.core.indexes.interval.IntervalIndex'>
