# TASK 1: EBOLA DATA

## **Steps of this analysis:**

###  1- Importing data : 
  - Automate importation of all files in the country data folder
  - Check if the file format in the folder is a csv
  
###  2- For each dataset we need to :
  - check size of data
  - detect important features
  - check values that each feature can take 
    * helps to choose important TERM to filter on
    * helps to detect some errors
    * to know range and type of values
  - remove duplicates rows
  - convert columns values to apropriate format if needed
  - use pandas.describe, it helps to understand data
  - add column country to index data
  - reording columns and set index
  - drop unnecessary feature
  - try to standardize features names when possible 
  - try to complete missing data if the data allows it
  
###  3- Merge dataframes :
  - Filter important feature to answer the question and Merge all data
  - remove rows with incomplete information as it's crucial at this level
  - index dataframe
  - compute the average per day for each month and for each country

## Analysis:

In [45]:
import pandas as pd
import os.path
from calendar import monthrange

pd.set_option('display.max_columns', None)

In [46]:
DATA_FOLDER = '../../../ADA2017-Tutorials/02 - Intro to Pandas/Data/'

In [47]:
EBOLA_FOLDER = DATA_FOLDER+'ebola/'

guinea = EBOLA_FOLDER+'guinea_data/'
liberia = EBOLA_FOLDER+'liberia_data/'
sl = EBOLA_FOLDER+'sl_data' #Sierra Leone

COUNTRIES=['Guinea','Liberia','Sierra Leone']
TERMS='Newly reported deaths|Newly Reported Cases|New case|New death|new_' 
#new_ is the term needed to filter new cases/deaths in Sierra Leone data where the term cases isn't explicitly said
#we didn't standardize description label because we don't know exactly what each country mean by each description

In [48]:
def loadAllFile(FolderPath):
    data = pd.DataFrame()
    #For each file in the country directory
    for f in os.listdir(FolderPath):
        if f.endswith(".csv"):
            file_path = os.path.join(FolderPath, f)
            file_data = pd.read_csv(file_path)
            data = data.append(file_data, ignore_index=True)
    return data

### Guinea

In [49]:
data_guinea = loadAllFile(guinea)
data_guinea.shape

(714, 25)

In [50]:
data_guinea.columns

Index(['Beyla', 'Boffa', 'Conakry', 'Coyah', 'Dabola', 'Dalaba', 'Date',
       'Description', 'Dinguiraye', 'Dubreka', 'Forecariah', 'Gueckedou',
       'Kerouane', 'Kindia', 'Kissidougou', 'Kouroussa', 'Lola', 'Macenta',
       'Mzerekore', 'Nzerekore', 'Pita', 'Siguiri', 'Telimele', 'Totals',
       'Yomou'],
      dtype='object')

In [51]:
data_guinea['Country']='Guinea'

In [52]:
data_guinea[data_guinea.duplicated()==True] # no duplicate row

Unnamed: 0,Beyla,Boffa,Conakry,Coyah,Dabola,Dalaba,Date,Description,Dinguiraye,Dubreka,Forecariah,Gueckedou,Kerouane,Kindia,Kissidougou,Kouroussa,Lola,Macenta,Mzerekore,Nzerekore,Pita,Siguiri,Telimele,Totals,Yomou,Country


In [53]:
data_guinea.Date=pd.to_datetime(data_guinea.Date) 
data_guinea.Date.unique()

array(['2014-08-04T00:00:00.000000000', '2014-08-26T00:00:00.000000000',
       '2014-08-27T00:00:00.000000000', '2014-08-30T00:00:00.000000000',
       '2014-08-31T00:00:00.000000000', '2014-09-02T00:00:00.000000000',
       '2014-09-04T00:00:00.000000000', '2014-09-07T00:00:00.000000000',
       '2014-09-08T00:00:00.000000000', '2014-09-09T00:00:00.000000000',
       '2014-09-11T00:00:00.000000000', '2014-09-14T00:00:00.000000000',
       '2014-09-16T00:00:00.000000000', '2014-09-17T00:00:00.000000000',
       '2014-09-19T00:00:00.000000000', '2014-09-21T00:00:00.000000000',
       '2014-09-22T00:00:00.000000000', '2014-09-23T00:00:00.000000000',
       '2014-09-24T00:00:00.000000000', '2014-09-26T00:00:00.000000000',
       '2014-09-30T00:00:00.000000000', '2014-10-01T00:00:00.000000000'], dtype='datetime64[ns]')

In [54]:
data_guinea.Totals.unique() # we notice different value :'66%', nan, integer (non negative value)

array([5, 0, 4, 9, 11, 133, 351, 495, 2, 228, 363, 31, 18, 13, 138, 89,
       161, 190, 52, 81, 7, 6, 220, 277, 21, 12, 8, 6309, 795, 5513, 772,
       785, 1235, '18', nan, '10', '28', '30', '141', '490', '661', '518',
       '5', '2', '292', '958', '66%', '0', '45', '26', '22', '87', '6',
       '997', '137', '7521', '1085', '1070', '6438', '9', '55', '1700',
       '12', '142', '499', '663', '294', '438', '80', '14', '999', '127',
       '7662', '1264', '1250', '6496', '58', '1719', '15', '24', '32',
       '533', '707', '560', '324', '468', '50', '82', '33', '1094', '21',
       '7734', '1251', '1173', '6504', '76', '41', '1858', '8', '29', '46',
       '36', '150', '563', '749', '642', '3', '337', '489', '65%', '99',
       '1113', '83', '1941', '11', '25', '49', '591', '790', '349', '501',
       '63%', '105', '1144', '8189', '1842', '1641', '31', '2018', '13',
       '17', '51', '151', '621', '823', '682', '368', '522', '61%', '103',
       '1191', '8746', '1978', '1707', '6797

- In this case we don't know how 'Totals' is calculated instead of other in whitch we suppose it's a sum of cities 

In [55]:
data_guinea[data_guinea.Totals.str.contains('%')==True].sample(3) 

Unnamed: 0,Beyla,Boffa,Conakry,Coyah,Dabola,Dalaba,Date,Description,Dinguiraye,Dubreka,Forecariah,Gueckedou,Kerouane,Kindia,Kissidougou,Kouroussa,Lola,Macenta,Mzerekore,Nzerekore,Pita,Siguiri,Telimele,Totals,Yomou,Country
248,,67%,44%,33%,100%,,2014-09-07,Fatality rate for confirmed and probables,100%,19%,67%,84%,9%,,86%,100%,,60%,,43%,38%,50%,38%,65%,42%,Guinea
536,,67%,44%,56%,100%,0%,2014-09-22,Fatality rate for confirmed and probables,100%,33%,70%,83%,23%,0.0,88%,100%,,54%,67%,,38%,50%,38%,62%,45%,Guinea
120,,64%,44%,,100%,,2014-08-30,Fatality rate for confirmed and probables,100%,14%,43%,85%,,,86%,100%,,56%,,55%,67%,50%,38%,66%,27%,Guinea


In [56]:
data_guinea[data_guinea.Totals.isnull()] 

Unnamed: 0,Beyla,Boffa,Conakry,Coyah,Dabola,Dalaba,Date,Description,Dinguiraye,Dubreka,Forecariah,Gueckedou,Kerouane,Kindia,Kissidougou,Kouroussa,Lola,Macenta,Mzerekore,Nzerekore,Pita,Siguiri,Telimele,Totals,Yomou,Country
43,,,,,,,2014-08-26,New cases of probables,,,,,,,,,,,,,,,,,,Guinea
660,,,,,,,2014-09-30,Total deaths of suspects,,,,,,,,,,,,,,,,,,Guinea


In [57]:
data_guinea.Description.unique() # Observations 

array(['New cases of suspects', 'New cases of probables',
       'New cases of confirmed', 'Total new cases registered so far',
       'Total cases of suspects', 'Total cases of probables',
       'Total cases of confirmed',
       'Cumulative (confirmed + probable + suspects)',
       'New deaths registered today',
       'New deaths registered today (confirmed)',
       'New deaths registered today (probables)',
       'New deaths registered today (suspects)',
       'Total deaths of suspects', 'Total deaths of probables',
       'Total deaths of confirmed',
       'Total deaths (confirmed + probables + suspects)',
       'Total PEC center today', 'Total PEC center today (confirmed)',
       'Total PEC center today (probables)',
       'Total PEC center today (suspects)',
       'Total of deaths in confirmed cases in CTE',
       'Total of cured in confirmed cases in CTE',
       'Number of male confirmed cases',
       'Number of female confirmed cases',
       'Number of male proba

In [58]:
data_guinea.describe() 

Unnamed: 0,Beyla,Boffa,Conakry,Coyah,Dabola,Dalaba,Date,Description,Dinguiraye,Dubreka,Forecariah,Gueckedou,Kerouane,Kindia,Kissidougou,Kouroussa,Lola,Macenta,Mzerekore,Nzerekore,Pita,Siguiri,Telimele,Totals,Yomou,Country
count,65.0,495.0,633.0,381.0,522.0,245.0,714,714,519.0,499.0,431.0,644.0,380.0,147.0,519.0,495.0,20.0,638.0,358.0,163.0,496.0,496.0,519.0,712.0,460.0,714
unique,12.0,29.0,227.0,28.0,13.0,9.0,22,60,6.0,37.0,44.0,223.0,42.0,7.0,24.0,19.0,6.0,168.0,78.0,42.0,35.0,22.0,28.0,317.0,26.0,1
top,0.0,0.0,0.0,0.0,0.0,0.0,2014-08-04 00:00:00,Total deaths of probables,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Guinea
freq,31.0,246.0,81.0,195.0,272.0,167.0,42,22,373.0,216.0,220.0,86.0,219.0,91.0,268.0,230.0,12.0,166.0,102.0,40.0,197.0,249.0,207.0,83.0,209.0,714
first,,,,,,,2014-08-04 00:00:00,,,,,,,,,,,,,,,,,,,
last,,,,,,,2014-10-01 00:00:00,,,,,,,,,,,,,,,,,,,


In [59]:
cols = ['Description','Totals']  + [col for col in data_guinea if col not in ['Description','Totals']]
data_guinea = data_guinea[cols] # reordering colomns 
data_guinea = data_guinea.set_index(['Country','Date'])

In [60]:
data_guinea.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Description,Totals,Beyla,Boffa,Conakry,Coyah,Dabola,Dalaba,Dinguiraye,Dubreka,Forecariah,Gueckedou,Kerouane,Kindia,Kissidougou,Kouroussa,Lola,Macenta,Mzerekore,Nzerekore,Pita,Siguiri,Telimele,Yomou
Country,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Guinea,2014-08-04,New cases of suspects,5,,0,5,,0,,0,0,,0,,,0,0,,0,,0,0,0,0,
Guinea,2014-08-04,New cases of probables,0,,0,0,,0,,0,0,,0,,,0,0,,0,,0,0,0,0,
Guinea,2014-08-04,New cases of confirmed,4,,0,1,,0,,0,0,,3,,,0,0,,0,,0,0,0,0,
Guinea,2014-08-04,Total new cases registered so far,9,,0,6,,0,,0,0,,3,,,0,0,,0,,0,0,0,0,
Guinea,2014-08-04,Total cases of suspects,11,,0,9,,0,,0,0,,2,,,0,0,,0,,0,0,0,0,


### Liberia 

In [61]:
data_liberia = loadAllFile(liberia)
data_liberia.shape

(3152, 19)

In [62]:
data_liberia['Country']='Liberia'

In [63]:
data_liberia.columns

Index(['Bomi County', 'Bong County', 'Date', 'Gbarpolu County', 'Grand Bassa',
       'Grand Cape Mount', 'Grand Gedeh', 'Grand Kru', 'Lofa County',
       'Margibi County', 'Maryland County', 'Montserrado County', 'National',
       'Nimba County', 'River Gee County', 'RiverCess County', 'Sinoe County',
       'Unnamed: 18', 'Variable', 'Country'],
      dtype='object')

In [64]:
data_liberia['Variable']

0                                     Specimens collected
1                           Specimens pending for testing
2                                  Total specimens tested
3                                   Newly reported deaths
4                        Total death/s in confirmed cases
5                         Total death/s in probable cases
6                        Total death/s in suspected cases
7       Total death/s in confirmed, probable, suspecte...
8       Case Fatality Rate (CFR) - Confirmed & Probabl...
9                                 Newly reported contacts
10                                  Total contacts listed
11                              Currently under follow-up
12                                          Contacts seen
13                Contacts who completed 21 day follow-up
14                             Contacts lost to follow-up
15                                         New admissions
16                 Total no. currently in Treatment Units
17            

In [65]:
data_liberia.rename(columns={'Variable': 'Description','National': 'Totals'}, inplace=True)

- Same day, 3 observations results of creating 'Unnamed 18' colomn over 3152 other observations -> by looking into 2014-12-01.csv, comma is missing at the end of the row: suspect error (in shift) 

In [66]:
data_liberia[data_liberia['Unnamed: 18'].isnull()==False]

Unnamed: 0,Bomi County,Bong County,Date,Gbarpolu County,Grand Bassa,Grand Cape Mount,Grand Gedeh,Grand Kru,Lofa County,Margibi County,Maryland County,Montserrado County,Totals,Nimba County,River Gee County,RiverCess County,Sinoe County,Unnamed: 18,Description,Country
2887,0.0,3.0,12/1/2014,0.0,2.0,,2.0,0.0,0.0,0.0,5.0,0.0,25.0,12.0,0.0,0.0,0.0,1.0,New Case/s (Suspected),Liberia
2888,0.0,0.0,12/1/2014,0.0,1.0,,0.0,0.0,0.0,0.0,1.0,0.0,9.0,7.0,0.0,0.0,0.0,0.0,New Case/s (Probable),Liberia
2889,0.0,0.0,12/1/2014,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,New case/s (confirmed),Liberia


In [67]:
data_liberia.drop('Unnamed: 18', axis=1, inplace=True)

In [68]:
data_liberia.Date=pd.to_datetime(data_liberia.Date) # need to parse to the same date format as guinea date

In [69]:
data_liberia[data_liberia.duplicated()==True] # no duplicate row

Unnamed: 0,Bomi County,Bong County,Date,Gbarpolu County,Grand Bassa,Grand Cape Mount,Grand Gedeh,Grand Kru,Lofa County,Margibi County,Maryland County,Montserrado County,Totals,Nimba County,River Gee County,RiverCess County,Sinoe County,Description,Country


In [70]:
data_liberia.describe() 

Unnamed: 0,Bomi County,Bong County,Gbarpolu County,Grand Bassa,Grand Cape Mount,Grand Gedeh,Grand Kru,Lofa County,Margibi County,Maryland County,Montserrado County,Totals,Nimba County,River Gee County,RiverCess County,Sinoe County
count,2012.0,2110.0,1531.0,1695.0,1695.0,1616.0,1409.0,2310.0,2090.0,1675.0,2309.0,2392.0,2080.0,1842.0,1880.0,1840.0
mean,39.652187,55.003365,4.280862,22.213923,21.772035,1.421163,5.753939,90.534113,194.645789,8.967164,512.878995,928.429849,48.895433,2.087405,5.599309,5.522935
std,104.48681,140.724044,14.650063,54.190995,56.158374,5.477662,17.903874,172.994014,489.976087,120.64288,1305.80713,2278.894424,127.251195,5.350046,17.524092,16.032927
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,8.0,0.0,0.0,0.0,0.0
50%,0.0,3.0,0.0,1.0,1.0,0.0,0.0,11.0,2.0,0.0,24.0,48.0,0.0,0.0,0.0,0.0
75%,27.0,31.0,0.0,18.0,8.0,0.0,2.0,91.5,63.75,1.0,222.0,441.0,30.0,1.0,1.0,1.0
max,782.0,1307.0,121.0,405.0,410.0,72.0,344.0,2145.0,3036.0,3175.0,11299.0,19155.0,893.0,100.0,113.0,102.0


In [71]:
cols = ['Description','Totals']  + [col for col in data_liberia if col not in ['Description','Totals']]
data_liberia = data_liberia[cols] # reordering colomns 
data_liberia = data_liberia.set_index(['Country','Date'])

In [72]:
data_liberia.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Description,Totals,Bomi County,Bong County,Gbarpolu County,Grand Bassa,Grand Cape Mount,Grand Gedeh,Grand Kru,Lofa County,Margibi County,Maryland County,Montserrado County,Nimba County,River Gee County,RiverCess County,Sinoe County
Country,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Liberia,2014-06-16,Specimens collected,1.0,,,,,,,,1.0,,,0.0,,,,
Liberia,2014-06-16,Specimens pending for testing,0.0,,,,,,,,0.0,,,0.0,,,,
Liberia,2014-06-16,Total specimens tested,28.0,,,,,,,,21.0,,,7.0,,,,
Liberia,2014-06-16,Newly reported deaths,2.0,,,,,,,,1.0,,,0.0,,,,
Liberia,2014-06-16,Total death/s in confirmed cases,8.0,,,,,,,,4.0,,,0.0,,,,


### Sierra Leone

In [73]:
data_sl = loadAllFile(sl)
data_sl.shape

(3262, 27)

In [74]:
data_sl.columns

Index(['34 Military Hospital', 'Bo', 'Bo EMC', 'Bombali', 'Bonthe',
       'Hastings-F/Town', 'Kailahun', 'Kambia', 'Kenema', 'Kenema (IFRC)',
       'Kenema (KGH)', 'Koinadugu', 'Kono', 'Moyamba', 'National',
       'Police training School', 'Police traning School', 'Port Loko',
       'Pujehun', 'Tonkolili', 'Unnamed: 18', 'Western area',
       'Western area combined', 'Western area rural', 'Western area urban',
       'date', 'variable'],
      dtype='object')

In [75]:
data_sl.rename(columns={'variable': 'Description','date': 'Date','National': 'Totals'}, inplace=True)
data_sl.Date=pd.to_datetime(data_sl.Date) 

In [76]:
data_sl['Unnamed: 18'].unique() # columns give us any information

array([ nan])

In [77]:
data_sl.drop('Unnamed: 18', axis=1, inplace=True)

In [78]:
data_sl[data_sl.duplicated()==True] # no duplicate row

Unnamed: 0,34 Military Hospital,Bo,Bo EMC,Bombali,Bonthe,Hastings-F/Town,Kailahun,Kambia,Kenema,Kenema (IFRC),Kenema (KGH),Koinadugu,Kono,Moyamba,Totals,Police training School,Police traning School,Port Loko,Pujehun,Tonkolili,Western area,Western area combined,Western area rural,Western area urban,Date,Description


In [79]:
data_sl['Country']='Sierra Leone'

In [80]:
data_sl.describe()

Unnamed: 0,34 Military Hospital,Bo EMC,Hastings-F/Town,Kenema (IFRC),Police training School,Police traning School,Western area,Western area combined
count,7.0,7.0,56.0,137.0,7.0,0.0,149.0,16.0
mean,19.285714,9.857143,81.696429,24.540146,7.428571,,87709.79,1304507.0
std,23.192774,13.765035,94.594622,56.857021,12.686701,,327471.3,0.0
min,0.0,0.0,0.0,0.0,0.0,,0.0,1304507.0
25%,5.5,1.0,3.0,0.0,0.0,,4.0,1304507.0
50%,8.0,2.0,78.5,7.0,0.0,,30.0,1304507.0
75%,24.5,14.0,108.25,28.0,13.0,,150.0,1304507.0
max,67.0,37.0,318.0,556.0,26.0,,1304507.0,1304507.0


In [81]:
data_sl.dropna(axis=1, how='all', inplace=True) #Drop columns where all of the elements are nan (Police traning School)

In [82]:
cols = ['Description','Totals']  + [col for col in data_sl if col not in ['Description','Totals']]
data_sl = data_sl[cols] # reordering colomns 
data_sl = data_sl.set_index(['Country','Date'])

In [83]:
data_sl.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Description,Totals,34 Military Hospital,Bo,Bo EMC,Bombali,Bonthe,Hastings-F/Town,Kailahun,Kambia,Kenema,Kenema (IFRC),Kenema (KGH),Koinadugu,Kono,Moyamba,Police training School,Port Loko,Pujehun,Tonkolili,Western area,Western area combined,Western area rural,Western area urban
Country,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Sierra Leone,2014-08-12,population,6348350,,654142,,494139,168729,,465048,341690,653013,,,335471,325003,278119,,557978,335574,434937,,,263619,1040888
Sierra Leone,2014-08-12,new_noncase,4,,0,,0,0,,0,0,3,,,0,0,0,,1,0,0,,,0,0
Sierra Leone,2014-08-12,new_suspected,10,,1,,0,0,,0,0,9,,,0,0,0,,0,0,0,,,0,0
Sierra Leone,2014-08-12,new_probable,1,,1,,0,0,,0,0,0,,,0,0,0,,0,0,0,,,0,0
Sierra Leone,2014-08-12,new_confirmed,11,,0,,0,0,,0,0,9,,,0,0,0,,2,0,0,,,0,0


##  All Data

In [84]:
#Selecting important column to answer the question
data_guinea = data_guinea[['Description','Totals']]
data_liberia = data_liberia[['Description','Totals']]
data_sl = data_sl[['Description','Totals']]

#Merge
data = pd.concat([data_guinea,data_liberia,data_sl])

#droping row with no information
data.dropna(axis=0, how='any', inplace=True) #Drop rows where any of the elements are nan

data.sample(6)

Unnamed: 0_level_0,Unnamed: 1_level_0,Description,Totals
Country,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
Liberia,2014-09-30,Cumulative deaths among HCW,92.0
Guinea,2014-09-23,Total new cases registered so far,29.0
Sierra Leone,2014-10-06,population,6348350.0
Guinea,2014-09-08,New deaths registered among health workers,0.0
Liberia,2014-08-18,New case/s (confirmed),2.0
Liberia,2014-10-18,Newly reported contacts,84.0


In [85]:
result = pd.DataFrame()
for country in COUNTRIES:
    all_dates = data.xs(country, level='Country').index.map(lambda t: str(t.year)+'-'+str('%02d' % t.month)).unique()
    for date_start in all_dates:
        date_start = date_start+'-'+"01"
        days_in_month = monthrange(int(date_start[0:4]), int(date_start[5:7]))[1]
        date_end = date_start[0:8]+ str(days_in_month)
        
        tmp = data.xs(country, level='Country').loc[date_start:date_end]
        tmp = tmp[tmp["Description"].str.contains(TERMS, case=False)]
        tmp['Totals']= tmp['Totals'].astype(int) #cases/death are discrete number
        tmp = tmp.rename(columns={'Totals': 'Avg new Cases/Deaths Per Day'})
    
        #We suppose that if there is no data for a day it means that there is any new cases/deaths ('0')
        tmp = round(tmp.groupby('Description').sum()/days_in_month,0) #we round because cases/death are discrete number
        tmp['Date']=date_start[0:4]+'-'+date_start[5:7]
        tmp['Country']=country
        tmp = tmp.reset_index()
        tmp = tmp.set_index(['Country','Date','Description'])
        result = result.append(tmp)
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Avg new Cases/Deaths Per Day
Country,Date,Description,Unnamed: 3_level_1
Guinea,2014-08,New cases of confirmed,2.0
Guinea,2014-08,New cases of confirmed among health workers,0.0
Guinea,2014-08,New cases of probables,0.0
Guinea,2014-08,New cases of suspects,2.0
Guinea,2014-08,New deaths registered,0.0
Guinea,2014-08,New deaths registered among health workers,0.0
Guinea,2014-08,New deaths registered today,0.0
Guinea,2014-08,New deaths registered today (confirmed),0.0
Guinea,2014-08,New deaths registered today (probables),0.0
Guinea,2014-08,New deaths registered today (suspects),0.0
