### Loading the Resources and Files needed

In [1]:
import pandas as pd

In [2]:
dengue_case = pd.read_csv('combined-data.csv')
dengue_case.head()

Unnamed: 0,City/Municipality,Month,Year,Alive,Death
0,ALFONSO,January,2014,1,1.0
1,ALFONSO,February,2014,1,0.0
2,ALFONSO,March,2014,2,0.0
3,ALFONSO,April,2014,1,0.0
4,ALFONSO,May,2014,1,0.0


### Checking Data Types of each Column

In [3]:
dengue_case.dtypes

City/Municipality     object
Month                 object
Year                   int64
Alive                  int64
Death                float64
dtype: object

### Removing Whitespaces

In [4]:
# remove whitespaces for "City/Municipality" column
dengue_case['City/Municipality'] = dengue_case['City/Municipality'].map(str.strip)

In [5]:
# remove whitespaces for 'Month' column
dengue_case['Month'] = dengue_case['Month'].map(str.strip)

### Fixing 'City/Municipality' column's consistency

In [6]:
# checking 'City/Municipality' consistency
dengue_case['City/Municipality'].value_counts()

City/Municipality
ALFONSO                     132
AMADEO                      132
BACOOR                      132
CARMONA                     132
CAVITE CITY                 132
DASMARINAS                  132
GENERAL MARIANO ALVAREZ     132
GENERAL EMILIO AGUINALDO    132
GENERAL TRIAS               132
IMUS                        132
INDANG                      132
KAWIT                       132
MAGALLANES                  132
MARAGONDON                  132
MENDEZ                      132
NAIC                        132
NOVELETA                    132
ROSARIO                     132
SILANG                      132
TAGAYTAY CITY               132
TANZA                       132
TERNATE                     132
TRECE MARTIRES CITY         132
Name: count, dtype: int64

In [7]:
dengue_case1 = dengue_case.copy()

In [8]:
# replace inconsistent values in 'City/Municipality' column
dengue_case1['City/Municipality'] = dengue_case1['City/Municipality'].replace({
    'TRECE MARITES CITY': 'TRECE MARTIRES CITY',
    'TRECE MARTIRES': 'TRECE MARTIRES CITY',
    'NOVALETA': 'NOVELETA',
    'GEN. MARIANO ALVAREZ': 'GENERAL MARIANO ALVAREZ',
    'GEN, MARIANO ALVAREZ': 'GENERAL MARIANO ALVAREZ'
})

### Filling Null Values

In [9]:
# filling empty cells for columns "Alive" and "Death"
dengue_case1['Alive'] = dengue_case1['Alive'].fillna(0)
dengue_case1['Death'] = dengue_case1['Death'].fillna(0)

In [10]:
dengue_case2 = dengue_case1.copy()

### Converting 'Alive' column into a numerical column

- Locating string object/s from numerical column 'Alive'

In [11]:
# convert the 'Alive' column into a numeric column and leaving 
alive_col = pd.to_numeric(dengue_case2['Alive'], errors='coerce').isna()

In [12]:
for idx, val in alive_col.items():
    if val == True:
        print(idx)

In [13]:
dengue_case2['Alive'].iloc[1623]

np.int64(0)

In [14]:
# replacing the faulty data (from 'o' to 0)
dengue_case2['Alive'] = dengue_case2['Alive'].replace({
    'O': 0
})

In [15]:
# converting 'Alive' column into a nuemrical column
dengue_case2['Alive'] = pd.to_numeric(dengue_case2['Alive'], errors='coerce')

In [16]:
# checking
dengue_case2['Alive'].isna().sum()

np.int64(0)

In [17]:
# checking
dengue_case2.dtypes

City/Municipality     object
Month                 object
Year                   int64
Alive                  int64
Death                float64
dtype: object

### Feature Engineering

In [21]:
dengue_case2['Cases'] = dengue_case2['Alive'] + dengue_case2['Death']

In [22]:
dengue_case2

Unnamed: 0,City/Municipality,Month,Year,Alive,Death,Cases
0,ALFONSO,January,2014,1,1.0,2.0
1,ALFONSO,February,2014,1,0.0,1.0
2,ALFONSO,March,2014,2,0.0,2.0
3,ALFONSO,April,2014,1,0.0,1.0
4,ALFONSO,May,2014,1,0.0,1.0
...,...,...,...,...,...,...
3031,TRECE MARTIRES CITY,August,2024,204,2.0,206.0
3032,TRECE MARTIRES CITY,September,2024,215,1.0,216.0
3033,TRECE MARTIRES CITY,October,2024,216,1.0,217.0
3034,TRECE MARTIRES CITY,November,2024,142,0.0,142.0


In [23]:
# update our .csv file
dengue_case2.to_csv('combined-data.csv', mode='w', index=False)