##### Combine the initial matrix with the final output that is calculated after the index.

In [1]:
# import modules 
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# read input matrix updated 
df_input_matrix = pd.read_csv('../input_matrix/inputMatrix_updated.csv').round(2)
df_input_matrix = df_input_matrix.rename(columns={'code_barrios':'code_barrios_units', 'name_barrios':'name_barrios_units', 'noise_pollution':'noise_units', 'pm_10':'pm_units', 'carbon_monoxide':'co_units', 'rainfall':'rain_units', 'solar_radiation':'solar_units', 'temperature':'temp_units', 'trees_density':'trees_units'})
df_input_matrix.head(3)

Unnamed: 0,code_barrios_units,name_barrios_units,noise_units,pm_units,co_units,rain_units,solar_units,temp_units,trees_units
0,172,San Cristobal,60.07,37.73,0.35,4.79,198.82,13.72,3172.64
1,173,Butarque,60.59,37.08,0.35,5.02,195.94,13.66,578.45
2,175,Los Angeles,59.59,38.67,0.35,4.47,203.28,13.83,3984.76


In [3]:
# read output matrix (with mpi index) 
df_output_matrix = pd.read_csv('../output_matrix/df_mpi_index.csv').round(2)
df_output_matrix.head(3)

Unnamed: 0,name_barrios,noise_pollution,pm_10,carbon_monoxide,rainfall,solar_radiation,temperature,trees_density,mpi_index
0,San Cristobal,91.12,105.97,104.04,102.93,95.04,79.7,92.1,96.74
1,Butarque,94.01,103.73,104.04,106.14,86.37,79.07,113.84,99.69
2,Los Angeles,88.45,109.2,104.04,98.47,108.47,80.86,85.3,97.78


In [4]:
# combine the input and the output matrix
df_combined = df_input_matrix.merge(df_output_matrix, left_index=True, right_index=True)
df_combined = df_combined.drop(columns=['name_barrios_units'], axis=1)
df_combined.head()

Unnamed: 0,code_barrios_units,noise_units,pm_units,co_units,rain_units,solar_units,temp_units,trees_units,name_barrios,noise_pollution,pm_10,carbon_monoxide,rainfall,solar_radiation,temperature,trees_density,mpi_index
0,172,60.07,37.73,0.35,4.79,198.82,13.72,3172.64,San Cristobal,91.12,105.97,104.04,102.93,95.04,79.7,92.1,96.74
1,173,60.59,37.08,0.35,5.02,195.94,13.66,578.45,Butarque,94.01,103.73,104.04,106.14,86.37,79.07,113.84,99.69
2,175,59.59,38.67,0.35,4.47,203.28,13.83,3984.76,Los Angeles,88.45,109.2,104.04,98.47,108.47,80.86,85.3,97.78
3,174,60.37,37.76,0.35,5.31,191.37,12.02,3810.34,Los Rosales,92.79,106.07,104.04,110.19,72.61,61.86,86.76,94.25
4,171,58.64,38.81,0.35,4.22,206.95,14.29,1144.74,"Villaverde Alto, Casco Historico de Villaverde",83.17,109.68,104.04,94.98,119.52,85.69,109.09,102.67


In [5]:
# change order columns 
df_final = df_combined[['code_barrios_units', 'name_barrios','noise_pollution', 'noise_units', 'pm_10', 'pm_units', 'carbon_monoxide', 'co_units', 'rainfall', 'rain_units','solar_radiation','solar_units','temperature','temp_units','trees_density','trees_units','mpi_index']]
df_final = df_final.rename(columns={'code_barrios_units':'code_barrios'})
df_final.head()

Unnamed: 0,code_barrios,name_barrios,noise_pollution,noise_units,pm_10,pm_units,carbon_monoxide,co_units,rainfall,rain_units,solar_radiation,solar_units,temperature,temp_units,trees_density,trees_units,mpi_index
0,172,San Cristobal,91.12,60.07,105.97,37.73,104.04,0.35,102.93,4.79,95.04,198.82,79.7,13.72,92.1,3172.64,96.74
1,173,Butarque,94.01,60.59,103.73,37.08,104.04,0.35,106.14,5.02,86.37,195.94,79.07,13.66,113.84,578.45,99.69
2,175,Los Angeles,88.45,59.59,109.2,38.67,104.04,0.35,98.47,4.47,108.47,203.28,80.86,13.83,85.3,3984.76,97.78
3,174,Los Rosales,92.79,60.37,106.07,37.76,104.04,0.35,110.19,5.31,72.61,191.37,61.86,12.02,86.76,3810.34,94.25
4,171,"Villaverde Alto, Casco Historico de Villaverde",83.17,58.64,109.68,38.81,104.04,0.35,94.98,4.22,119.52,206.95,85.69,14.29,109.09,1144.74,102.67


##### Categorize Environmental Discomfort level according to 5 different values

In [12]:
# this for loop assigns a mpi level according to the mpi index
mpi_level = [] 
for value in df_final["mpi_index"]: 
    if value >= 105.36: 
        mpi_level.append(1) 
    elif value < 105.36 and value > 101.2:
        mpi_level.append(2)
    elif value < 101.2 and value > 97.12:
        mpi_level.append(3)
    elif value < 97.12 and value > 93:
        mpi_level.append(4)
    else: 
        mpi_level.append(5) 

# add new column to the dataframe 
df_final["mpi_level"] = mpi_level
# print first five lines of the dataset
df_final.head(3)

Unnamed: 0,code_barrios,name_barrios,noise_pollution,noise_units,pm_10,pm_units,carbon_monoxide,co_units,rainfall,rain_units,solar_radiation,solar_units,temperature,temp_units,trees_density,trees_units,mpi_index,mpi_level
0,172,San Cristobal,91.12,60.07,105.97,37.73,104.04,0.35,102.93,4.79,95.04,198.82,79.7,13.72,92.1,3172.64,96.74,4
1,173,Butarque,94.01,60.59,103.73,37.08,104.04,0.35,106.14,5.02,86.37,195.94,79.07,13.66,113.84,578.45,99.69,3
2,175,Los Angeles,88.45,59.59,109.2,38.67,104.04,0.35,98.47,4.47,108.47,203.28,80.86,13.83,85.3,3984.76,97.78,3


In [16]:
df_mpi_level = df_final[['code_barrios','mpi_level' ]]
df_mpi_level.head()
df_mpi_level.to_csv('../final_matrix/mpi_level.csv', index=False)