# Mount Drive and Load Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%load_ext rpy2.ipython

In [None]:
%%R
url = ('/content/drive/Shareddrives/GEM Shared Drive/Projects/Fossil Infrastructure (GFIT)/Pipelines/pipe.csv')
dataset = read.csv(url)

In [None]:
%%R
install.packages("splitstackshape")
install.packages("reshape")
install.packages("reshape2")
install.packages("tidyr")

In [None]:
%%R
library(splitstackshape)
library(reshape)
library(reshape2)
library(tidyr)

# Formatting Data for QGIS

In [None]:
%%R
#clean data
routes <-dataset[,c("Project...Segment.ID","Fuel","Pipeline.name","Status","Route")]
routes$Route <- gsub(" ","", routes$Route)

In [None]:
%%R
nrow(routes)

[1] 2863


In [None]:
%%R
#divide into laterals—transform data with cSplit and melt
routes2 <- cSplit(routes, "Route", sep = ";", direction = "wide")

routes3 <- melt(routes2, id=c("Project...Segment.ID","Fuel","Pipeline.name","Status")) 
colnames(routes3)[colnames(routes3) == "variable"] <- "RouteNum"

routes4 <- drop_na(routes3, "value")

In [None]:
%%R
names(routes4)

[1] "Project...Segment.ID" "Fuel"                 "Pipeline.name"       
[4] "Status"               "RouteNum"             "value"               


In [None]:
%%R
#divide into points—transform data with cSplit and melt
routes5 <- cSplit(routes4, "value", sep = ":", direction = "wide")

routes6 <- melt(routes5, id=c("Project...Segment.ID","Fuel","Pipeline.name","Status", "RouteNum"))
colnames(routes6)[colnames(routes6) == "variable"] <- "PointNum"
colnames(routes6)[colnames(routes6) == "value"] <- "Coord"  

routes7 <- drop_na(routes6, "Coord")

In [None]:
%%R
#divide into lat/long—transform data with cSplit and melt
routes8 <- cSplit(routes7, "Coord", sep = ",", direction = "wide")

In [None]:
%%R
routes9 = as.data.frame(routes8)
routes9$Coord <- paste(routes9$Coord_2, routes9$Coord_1, sep=" ")

In [None]:
%%R
names(routes9)

[1] "Project...Segment.ID" "Fuel"                 "Pipeline.name"       
[4] "Status"               "RouteNum"             "PointNum"            
[7] "Coord_1"              "Coord_2"              "Coord"               


In [None]:
%%R
routes9 <- subset(routes9, select = -c(7:8))

In [None]:
%%R
#merge points back into laterals
routes10 <- spread(routes9, PointNum, Coord, drop = FALSE)

In [None]:
%%R
names(routes10)

In [None]:
%%R
routes11 <- unite(routes10, route, 6:675, sep = ",", remove = TRUE, na.rm = TRUE)

In [None]:
%%R
names(routes11)

In [None]:
%%R
#format to match QGIS string requirement
routes11$line <- "("
routes11$lineend <- ")"
routes11$LineStr <- paste(routes11$line, routes11$route, routes11$lineend, sep="")

In [None]:
%%R
names(routes11)

In [None]:
%%R
#merge laterals back together
routes12 <- spread(routes11, RouteNum, LineStr, drop = FALSE)

In [None]:
%%R
names(routes12)

In [None]:
#with dataset=routes12, create a new variable names "route" by uniting 
%%R
routes13 <- unite(routes12, route, 8:171, sep = ",", remove = TRUE, na.rm = TRUE)

In [None]:
%%R
#format to match QGIS string requirement
routes13$multi <- "MULTILINESTRING ("
routes13$end <- ")"
routes13$MultiLine <- paste(routes13$multi, routes13$route, routes13$end, sep="")

In [None]:
%%R
names(routes13)

In [None]:
%%R
#remove “route”, “multi” and “end”, keep “MultiLine”
routes13 <- subset(routes13, select = -c(5:7))

In [None]:
%%R
#export results to Pipelines folder
write.csv(routes11, '/content/drive/Shareddrives/GEM Shared Drive/Projects/Fossil Infrastructure (GFIT)/Pipelines/test11.xlsx')

In [None]:
%%R
#export results to Pipelines folder
write.csv(routes13, '/content/drive/Shareddrives/GEM Shared Drive/Projects/Fossil Infrastructure (GFIT)/Pipelines/WKT Format 2021-08-26.xlsx')