In [None]:
# To run R in Jupyter Notebook
%load_ext rpy2.ipython

In [None]:
pip install python-dotenv

In [None]:
import os
from dotenv import load_dotenv
load_dotenv('.env')

API_KEY = os.getenv('GOOGLE_API_KEY')

%R -i API_KEY


Data Preparation for Air Pollution Index 2018

In [None]:
%%R
# Data Preparation for Air Pollution Index 2018

API = read.csv("RawDataSet\\API.csv",header=TRUE)       # Read the data
API$Time = strptime(API$Time, format = "%d-%m-%y %H:%M")    # Change the format of Time
API$Time = format(API$Time, "%Y")                                               # Change the time attribute only the year
API$Time = as.integer(API$Time)                                                     # Change the format of Time to integer
API = as.data.frame(lapply(API, as.integer))                                      # I also don't know why I need to do this

API_18 = subset(API, Time == 2018)                    # Subset the data to only 2018
API_18 = round(colMeans(API_18, na.rm = TRUE), digits = 0)
API_18 = data.frame(API_18)

#geocode
library(ggmap)
register_google(key = API_KEY)

hold_coor = data.frame()
hold_coors = data.frame()

for (i in 2:nrow(API_18)) {
  location = paste(rownames(API_18)[i], ", Malaysia")
  coor = geocode(location = location, output = "latlon")
  hold_coor = rbind(hold_coor, cbind(location, coor))
}

API_18 = data.frame(API_18[-1,]) #remove the year row
hold_coors = cbind(hold_coor, API_18)
colnames(hold_coors) = c("location", "lon", "lat", "API")

  # remove east malaysia
hold_coors = subset(hold_coors, hold_coors$lon < 105)
write.csv(hold_coors, file = "PreparedDataSet\\API_18.csv")

Data Prepation for API 2019

In [None]:
"""
This code performs data preparation for the Air Pollution Index (API) dataset for the year 2019.
It reads the raw dataset, changes the format of the time attribute, subsets the data for the year 2019,
calculates the mean API values, geocodes the locations, removes certain locations, and writes the prepared dataset to a CSV file.
"""

%%R
# Data Preparation for Air Pollution Index 2019

API = read.csv("RawDataSet\\API.csv",header=TRUE)       # Read the data
API$Time = strptime(API$Time, format = "%d-%m-%y %H:%M")    # Change the format of Time
API$Time = format(API$Time, "%Y")                                               # Change the time attribute only the year
API$Time = as.integer(API$Time)                                                     # Change the format of Time to integer
API = as.data.frame(lapply(API, as.integer))                                      # I also don't know why I need to do this

API_19 = subset(API, Time == 2019)                    # Subset the data to only 2019
API_19 = round(colMeans(API_19, na.rm = TRUE), digits = 0)
API_19 = data.frame(API_19)

#geocode
library(ggmap)
register_google(key = API_KEY)

hold_coor = data.frame()
hold_coors = data.frame()

for (i in 2:nrow(API_19)) {
  location = paste(rownames(API_19)[i], ", Malaysia")
  coor = geocode(location = location, output = "latlon")
  hold_coor = rbind(hold_coor, cbind(location, coor))
}

API_19 = data.frame(API_19[-1,]) #remove the year row
hold_coors = cbind(hold_coor, API_19)
colnames(hold_coors) = c("location", "lon", "lat", "API")

# remove east malaysia & langkawi
hold_coors = subset(hold_coors, hold_coors$lon < 105)
hold_coors = subset(hold_coors, hold_coors$lon > 100)
hold_coors = na.omit(hold_coors) # remove NA values
write.csv(hold_coors, file = "PreparedDataSet\\API_19.csv",)

Meteo

In [None]:
%%R
library(dplyr)
Meteo_18 = read.csv("RawDataSet\\Meteo2018.csv")
Meteo_18 = Meteo_18 %>% group_by(name, longitude, latitude) %>% summarize(temp = mean(temp), humidity=mean(humidity),precip=mean(precip),humidity=mean(humidity), windspeed = mean(windspeed))
write.csv(Meteo_18, file = "PreparedDataSet\\Meteo_18.csv")

In [None]:
%%R
library(dplyr)
Meteo_19 = read.csv("RawDataSet\\Meteo2019.csv")
Meteo_19 = Meteo_19 %>% group_by(name, longitude, latitude) %>% summarize(temp = mean(temp), humidity=mean(humidity),precip=mean(precip),humidity=mean(humidity), windspeed = mean(windspeed))
write.csv(Meteo_19, file = "PreparedDataSet\\Meteo_19.csv")

Vegetation

In [None]:
%%R
library(dplyr)
Vegetation_1819 = read.csv("RawDataSet\\vegetation.csv")

Vegetation_1819 = subset(Vegetation_1819, Vegetation_1819$threshold == 30)         #select only 30% tree cover
Vegetation_1819 = Vegetation_1819 %>%
            mutate(treecover2018 = extent_2010_ha - rowSums(.[, 19:26], na.rm = TRUE) + gain_2000.2020_ha) %>%
            mutate(treecover2019 = extent_2010_ha - rowSums(.[, 19:27], na.rm = TRUE) + gain_2000.2020_ha) %>%
            select(subnational1,subnational2,treecover2018, treecover2019)
            

#geocode
library(ggmap)
register_google(key = API_KEY)

hold_coor = data.frame()
hold_coors = data.frame()
for (i in 1:nrow(Vegetation_1819)) {
  location = paste(Vegetation_1819[i,2], ",", Vegetation_1819[i,1],", Malaysia")
  coor = geocode(location = location, output = "latlon")
  hold_coor = rbind(hold_coor, cbind(location, coor))
}
hold_coors = cbind(hold_coor,Vegetation_1819[,3:4])
#remove east malaysia & langkawi
hold_coors = subset(hold_coors, hold_coors$lon < 105)
hold_coors = subset(hold_coors, hold_coors$lon > 100)
write.csv(hold_coors,file="PreparedDataSet\\Vegetation_1819.csv")

Traffic

In [None]:
import pandas as pd

Traffic_1819 = pd.read_csv("RawDataSet\\Traffic.csv")
Traffic_1819 = Traffic_1819.drop(['BIL','STESEN','2013','2014','2015','2016','2017','2020','2021','2022'], axis = 1)
Traffic_1819['LOKASI'] = Traffic_1819['LOKASI'].str.split('-')
Traffic_1819 = Traffic_1819.explode('LOKASI').reset_index(drop = True)
Traffic_1819['2018'] = Traffic_1819['2018'].str.replace(',','')
Traffic_1819['2019'] = Traffic_1819['2019'].str.replace(',','')
Traffic_1819['2018'] = Traffic_1819['2018'].astype(float)
Traffic_1819['2019'] = Traffic_1819['2019'].astype(float)



In [None]:
%R -i Traffic_1819 # Pass the data frame to R

In [None]:
%%R
# geocode
library(ggmap)
register_google(key = API_KEY)

hold_coor = data.frame()
hold_coors = data.frame()
for (i in 1:nrow(Traffic_1819)) {
  location = paste(Traffic_1819[i,1],", Malaysia")
  coor = geocode(location = location, output = "latlon")
  hold_coor = rbind(hold_coor, cbind(location, coor))
}

hold_coors = cbind(hold_coor,Traffic_1819[,2:3])
write.csv(hold_coors,file="PreparedDataSet\\Traffic_1819.csv")