## Welcome to your notebook.


#### Run this cell to connect to your GIS and get started:

In [None]:
# Import GIS and modules
import pandas as pd
from arcgis.gis import GIS
import requests
gis = GIS("home")

#### Now you are ready to start!

In [11]:
# Set up parmeters
url = "https://test-data452e1421.s3.eu-west-2.amazonaws.com/permanent_csv.csv"


In [5]:
# Import feature layer item
feature_layer_item = gis.content.get("7d9734d9ab1048468a3954ab25d6a1df")
feature_layer_item

In [6]:
# Convert the featuer layer item into a csv file
csv_path = feature_layer_item.get_data()

In [7]:
# Read in the csv file as a dataframe
original_df = pd.read_csv(csv_path)

In [20]:
# Drop any column names that consist of "unnamed"
original_df.drop(original_df.columns[original_df.columns.str.contains('unnamed', case = False)], axis = 1, inplace = True)

In [22]:
# Import S3 object url and request data from the url into a file
s3_object_link = url
s3_file = requests.get(s3_object_link)

# Separate each line in the file and format it into a regular csv format
format_file = s3_file.text.splitlines()

# Convert the formatted file into a dataframe
s3_df = pd.DataFrame([x.split(",") for x in format_file])

# Set the first row of the dataframe as the header
s3_df_header = s3_df.iloc[0]
s3_df = s3_df[1:]
s3_df.columns = s3_df_header

In [23]:
# Print s3_df for a quick view
s3_df.head(5)

Unnamed: 0,serialNumber,timeStamp,VOC,CO2,SPM1,SPM25,SPM10,AEC1,AEC25,AEC10,lat,lon
1,A1,27/7/22,10,1,1,1,1,1,1,1,,
2,B112312,27/3/21,10,1,2,1,1,0,1,1,,
3,C123412,20/7/22,10,1,1,3,1,0,1,1,,
4,D112,28/7/22,10,5,1,1,1,1,1,1,,
5,1111111111111111,2017-08-19 12:17:55-0400,0,415,0,0,0,0,0,0,20.0,41.0


In [37]:
# Concat the two dataframes into one dataframe and replace the original dataframe
combined_df = pd.concat([original_df, s3_df]).reset_index()


In [38]:
# Print the first 5 of the combined dataframe.
combined_df.head(5)

Unnamed: 0,index,serialNumber,timeStamp,VOC,CO2,SPM1,SPM25,SPM10,AEC1,AEC25,AEC10,status,lat,lon
0,0,200,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.498667,-0.127913
1,1,201,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.492257,-0.134273
2,2,202,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.498667,-0.127913
3,3,203,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.498667,-0.127913
4,4,204,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.492257,-0.134273


In [39]:
# Print the last 5 of the combined dataframe.
combined_df.tail(5)

Unnamed: 0,index,serialNumber,timeStamp,VOC,CO2,SPM1,SPM25,SPM10,AEC1,AEC25,AEC10,status,lat,lon
25,4,D112,28/7/22,10,5,1,1,1,1,1,1,,,
26,5,1111111111111111,2017-08-19 12:17:55-0400,0,415,0,0,0,0,0,0,,20.0,41.0
27,6,1111111111111111,2017-08-19 12:17:55-0400,0,415,0,0,0,0,0,0,,20.0,41.0
28,7,1111111111111111,2017-08-19 12:17:55-0400,0,415,0,0,0,0,0,0,,20.0,41.0
29,8,1243,05:30:30,1243,9,10,10,2,7,9,10,,,


In [40]:
# Drop the row from the dataframe if all row values are NA.
combined_df = combined_df.dropna(axis='rows', how ='all')


In [41]:
# Print the first 5 rows of the new updated dataframe.
combined_df.head(5)

Unnamed: 0,index,serialNumber,timeStamp,VOC,CO2,SPM1,SPM25,SPM10,AEC1,AEC25,AEC10,status,lat,lon
0,0,200,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.498667,-0.127913
1,1,201,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.492257,-0.134273
2,2,202,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.498667,-0.127913
3,3,203,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.498667,-0.127913
4,4,204,7/21/2022 18:34,0,400,0,0,8,8,8,8,Active,51.492257,-0.134273


In [42]:
# Convert the dataframe into a csv file with the same path as the original csv file attached to the feature layer
combined_df.to_csv(csv_path)

In [43]:
# Update the feature layer item with the new csv file path
feature_layer_item.update({}, csv_path)

True

In [44]:
# Publish the new feature layer item
feature_layer_item.publish(overwrite=True)