In [2]:
import ee 
ee.Authenticate()


Successfully saved authorization token.


In [3]:
ee.Initialize()

In [5]:
# check evelation
region = ee.Geometry.BBox(-54.5, -13.5, -51.0, -10.5)

elev = ee.Image("USGS/SRTMGL1_003")

# Compute mean elevation over the region
mean = elev.reduceRegion(
    reducer=ee.Reducer.mean(),
    geometry=region,
    scale=1000,
    maxPixels=1e9
)

In [6]:
mean

<ee.dictionary.Dictionary at 0x7fe4d0fde2e0>

In [29]:
# load  data 
import pandas as pd 
df = pd.read_csv("/Users/hereagain/Desktop/OpenAItoZ/dataset/UpperXingu_test.csv")

features = [
    ee.Feature(
        ee.Geometry.Point([row['x'], row['y']]),
        {
            'type': row['type'],
            'distriver1': row['distriver1'],
            'distriver2': row['distriver2']
        }
    )
    for _, row in df.iterrows()
]

# Create FeatureCollection
points = ee.FeatureCollection(features)


In [30]:
# i chose the soil data from 15-30 cm depth from soilgrids
ph = ee.Image("projects/soilgrids-isric/phh2o_mean").select("phh2o_15-30cm_mean").rename("ph")
soc = ee.Image("projects/soilgrids-isric/soc_mean").select("soc_15-30cm_mean").rename("soc")
clay = ee.Image("projects/soilgrids-isric/clay_mean").select("clay_15-30cm_mean").rename("clay")
cec = ee.Image("projects/soilgrids-isric/cec_mean").select("cec_15-30cm_mean").rename("cec")
bdod = ee.Image("projects/soilgrids-isric/bdod_mean").select("bdod_15-30cm_mean").rename("bulk_density")


In [17]:
soil = ph.addBands(soc).addBands(clay).addBands(cec).addBands(bdod)

# Sample at your 50 site points
sampled = soil.sampleRegions(
    collection=points,
    scale=250,
    geometries=True
)


In [18]:
results = sampled.getInfo()['features']

In [19]:
results

[{'type': 'Feature',
  'geometry': {'geodesic': False,
   'type': 'Point',
   'coordinates': [-54.054361079462865, -13.238936630024575]},
  'id': '0_0',
  'properties': {'bulk_density': 124,
   'cec': 170,
   'clay': 285,
   'distriver1': 120202.56073676547,
   'distriver2': 41888.89334833111,
   'ph': 51,
   'soc': 196,
   'type': 'candidate'}},
 {'type': 'Feature',
  'geometry': {'geodesic': False,
   'type': 'Point',
   'coordinates': [-53.32493232112654, -12.632488538559052]},
  'id': '1_0',
  'properties': {'bulk_density': 127,
   'cec': 64,
   'clay': 292,
   'distriver1': 222586.0764741041,
   'distriver2': 73665.84208276404,
   'ph': 48,
   'soc': 154,
   'type': 'candidate'}},
 {'type': 'Feature',
  'geometry': {'geodesic': False,
   'type': 'Point',
   'coordinates': [-53.266560571670006, -12.352933390681367]},
  'id': '2_0',
  'properties': {'bulk_density': 131,
   'cec': 88,
   'clay': 244,
   'distriver1': 245487.1036563949,
   'distriver2': 55371.91513267103,
   'ph': 52,

In [20]:
records = []
for f in results:
    props = f['properties']
    coords = f['geometry']['coordinates']
    props['longitude'] = coords[0]
    props['latitude'] = coords[1]
    records.append(props)

df = pd.DataFrame(records)

In [None]:
df.head()

Unnamed: 0,bulk_density,cec,clay,distriver1,distriver2,ph,soc,type,longitude,latitude
0,124,170,285,120202.560737,41888.893348,51,196,candidate,-54.054361,-13.238937
1,127,64,292,222586.076474,73665.842083,48,154,candidate,-53.324932,-12.632489
2,131,88,244,245487.103656,55371.915133,52,157,candidate,-53.266561,-12.352933
3,131,78,207,21525.564974,164316.430523,49,118,candidate,-55.353967,-11.506791
4,127,105,289,130162.573413,49880.052296,51,165,candidate,-53.964775,-13.214423
5,128,78,251,231805.788625,23536.117734,47,113,candidate,-53.417459,-12.102043
6,130,66,239,247314.947876,50844.779415,49,128,candidate,-53.278897,-12.310092
7,130,84,258,243962.24136,40819.365339,50,143,candidate,-53.360821,-12.257054
8,132,67,237,229732.742755,20868.159108,50,158,candidate,-53.427337,-12.07961
9,128,92,295,60106.466662,58619.749279,52,118,candidate,-55.062531,-13.065316


### Add slope / Terrain features for both train and test

In [31]:
elev = ee.Image("USGS/SRTMGL1_003")

# Compute slope (in degrees)
slope = ee.Terrain.slope(elev).rename("slope")

Add terrain features 

In [48]:
elev = ee.Image("USGS/SRTMGL1_003")

# Compute TRI using focal standard deviation
tri = elev.reduceNeighborhood(
    reducer=ee.Reducer.stdDev(),
    kernel=ee.Kernel.square(1)  # This gives a 3x3 window
).rename("tri")

In [49]:
soil_with_slope = soil.addBands(slope)
soil_with_terrain = soil_with_slope.addBands(tri)

In [50]:
sampled = soil_with_terrain.sampleRegions(
    collection=points,
    scale=250,
    geometries=True
)

In [51]:
test_results = sampled.getInfo()['features']

In [53]:
records = []
for f in test_results:
    props = f['properties']
    coords = f['geometry']['coordinates']
    props['longitude'] = coords[0]
    props['latitude'] = coords[1]
    records.append(props)

test_df = pd.DataFrame(records)
test_df.head()

Unnamed: 0,bulk_density,cec,clay,distriver1,distriver2,ph,slope,soc,tri,type,longitude,latitude
0,124,170,285,120202.560737,41888.893348,51,1.070147,196,4.049082,candidate,-54.054361,-13.238937
1,127,64,292,222586.076474,73665.842083,48,0.166004,154,0.666667,candidate,-53.324932,-12.632489
2,131,88,244,245487.103656,55371.915133,52,0.165912,157,0.31427,candidate,-53.266561,-12.352933
3,131,78,207,21525.564974,164316.430523,49,1.576528,118,6.40216,candidate,-55.353967,-11.506791
4,127,105,289,130162.573413,49880.052296,51,0.84159,165,3.15446,candidate,-53.964775,-13.214423


In [61]:
test_df.to_csv("/Users/hereagain/Desktop/OpenAItoZ/dataset//test_UpperXingu.csv", index=False)


### Adding new soil features to the original train dataset 

In [57]:
train_new = pd.read_csv('/Users/hereagain/Desktop/OpenAItoZ/dataset/RobertSWalker/submit.csv')
features = [
    ee.Feature(
        ee.Geometry.Point([row['x'], row['y']]),
        {
            'type': row['type']
        }
    )
    for _, row in train_new.iterrows()
]

# Create FeatureCollection
points2 = ee.FeatureCollection(features)

#soil = ph.addBands(soc).addBands(clay).addBands(cec).addBands(bdod)
# add slope 
#soil_with_slope = soil.addBands(slope)
# add terrain 

sampled_train = soil_with_terrain.sampleRegions(
    collection=points2,
    scale=250,
    geometries=True
)
train_results = sampled_train.getInfo()['features']


In [58]:
records2 = []
for f in train_results:
    props = f['properties']
    coords = f['geometry']['coordinates']
    props['longitude'] = coords[0]
    props['latitude'] = coords[1]
    records2.append(props)

train_df = pd.DataFrame(records2)

In [59]:
train_df.head()

Unnamed: 0,bulk_density,cec,clay,ph,slope,soc,tri,type,longitude,latitude
0,127,76,383,51,1.076476,184,5.335648,earthwork,-67.069065,-10.482522
1,130,83,345,46,0.992471,66,3.664983,earthwork,-67.075951,-10.287186
2,131,65,282,47,4.312538,72,13.695092,earthwork,-67.114662,-10.427579
3,131,66,387,47,0.583209,98,1.523479,earthwork,-67.21028,-10.464207
4,134,92,295,47,0.579662,78,2.249829,earthwork,-67.219721,-10.307531


In [60]:
train_df.to_csv('/Users/hereagain/Desktop/OpenAItoZ/dataset/RobertSWalker/new_train.csv', index=False)