<a href="https://colab.research.google.com/github/MinahilSadiq1/Classification_of_Corn_Crop/blob/main/LSTM_Classfication_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install geemap

In [None]:
import geemap

In [None]:
import folium

In [None]:
import ee
ee.Authenticate()
ee.Initialize()

In [None]:
#for testing
Map = geemap.Map(center=[31.5204, 74.3587], zoom=20)
Map

In [None]:
#loading sentinel 2
sentinel2 = ee.ImageCollection("COPERNICUS/S2_SR")
#district boundaries
district_boundary_table = ee.FeatureCollection("projects/ee-sp20-bcs-023/assets/boundaries_file-polygon")
#alpha farm fields
corn_map = ee.FeatureCollection("projects/ee-sp20-bcs-023/assets/AlfaFarmsMaizeLand")

In [None]:
# Load the corn and other feature collections
corn = ee.FeatureCollection("projects/ee-sp20-bcs-023/assets/alpha_corn")
other = ee.FeatureCollection("projects/ee-sp20-bcs-023/assets/classified_noncorn")

In [None]:
# Set the map center and add the corn map layer
Map.centerObject(corn_map, 8)
Map.addLayer(corn_map)

In [None]:
# Define the region of interest .. KASUR
district_boundary = district_boundary_table.filter(ee.Filter.eq('DISTRICT', 'KASUR')).geometry()
gt = corn.merge(other)

In [None]:
def addNDVIBand(image):
  # calculate NDVI for input images using B8(Near Infrared) and B4(RED) bands
    ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
    # Green Normalized Difference Vegetation Index using band B8 and B3(Green)
    gndvi = image.normalizedDifference(['B8', 'B3']).rename('GNDVI')
    #Enhanced Vegetation Index using B8, B4, B2(BLUE)
    evi = image.expression(
        '2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))',
        {
            'NIR': image.select('B8'),
            'RED': image.select('B4'),
            'BLUE': image.select('B2')
        }
    ).rename('EVI')
    #returning original image with three bands
    return image.addBands([ndvi, gndvi, evi])



In [None]:
#collection dates 01-01-2023 to 01-06-2023
collection_dates = ['2023-01-01','2023-01-15','2023-02-01','2023-02-15','2023-03-01','2023-03-15','2023-04-01','2023-04-15','2023-05-01','2023-05-15','2023-06-01']

In [None]:
#an empty list
feature_data = []
#loop iterates from 0 to one lesss than collection dates
for i in range(0, len(collection_dates)-1):
    print(i)

    #start and end date for specific time periods
    start_date = collection_dates[i]
    end_date = collection_dates[i+1]

    #counter variable used to assign unique variable
    point_id_counter = 1

    #filtering image collection that fall whithin specific time period and boundary defined
    collection = sentinel2.filterDate(start_date, end_date).filterBounds(district_boundary)

    #calculating vegetation indecis for eaxh image
    collection = collection.map(addNDVIBand)

    #calculates the median value for each band across all images & selecting specific bands
    #of interest, including Sentinel-2 spectral bands and (NDVI, EVI, GNDVI)
    collection_image = collection.median().select(['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B11', 'B12', 'NDVI', 'EVI', 'GNDVI'])

    #extracting features for analysis , at the scale of 10 meters
    extracted_features = collection_image.sampleRegions(
    collection = gt,
    properties = ['class'],
    scale=10
    )

    #creating new feature collection containing label property copied from class
    featureCollection = extracted_features.map(lambda feature: ee.Feature(None, {'label': feature.get('class')}).copyProperties(feature))

    # Calculate the total number of features in the FeatureCollection
    total_count = extracted_features.size().getInfo()
    print(total_count)

    # Calculate the number of features in each split
    split_count = int(total_count / 10)

    # Create a list to store the splits
    splits = []

    # Iterate over 10 partitions
    for i in range(1, 11):
        # Calculate the starting and ending index for each split
        start_index = (i - 1) * split_count
        end_index = i * split_count

        # Filter the FeatureCollection to get the current split
        split = extracted_features.toList(split_count, start_index)
        splits.append(split)

    print("----------------Date: ",start_date,"-----------------")

    for split in splits:
      #getInfo used to convert to the python list
        split_list = split.getInfo()
        for data in split_list:
          # 'data' is a single feature in split
            dictionary = data['properties']
            dictionary['Date'] = start_date
            dictionary['Point ID'] = point_id_counter
            feature_data.append(dictionary)
            point_id_counter = point_id_counter + 1


    print("At the end, Point ID is: ",point_id_counter)

In [None]:
#extracted features
feature_data

In [None]:
#creating csv file from above features
import csv

# Specify the CSV file name
csv_file = 'data_features.csv'

# Extract the keys from the first dictionary to use as column headers
fieldnames = feature_data[0].keys()

# Write the data to the CSV file
with open(csv_file, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    # Write the header row
    writer.writeheader()

    # writes each dictionary row as a row in the CSV file, with values corresponding to the column headers.
    for row in feature_data:
        writer.writerow(row)

print(f'CSV file "{csv_file}" has been created.')


CSV file "data_features.csv" has been created.


In [None]:
# importing required libraries
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#reading a file
data = pd.read_csv('data_features.csv')

In [None]:
data

In [None]:
# 1. Preprocessing of data
# Converting "Date" to proper date format
data['Date'] = pd.to_datetime(data['Date'])


In [None]:
#index column "date"
data.index = data.pop('Date')

In [None]:
data

In [None]:
# Encode the "class" column into integer type
#data['class'] = data['class'].astype('int')

# Step 2: Prepare the input sequences
def create_sequences(data, window_size):
    sequences = []
    for point_id, group in data.groupby('Point ID'):
        values = group.drop(['class', 'Point ID'], axis=1).values
        num_sequences = len(values) - window_size + 1
        for i in range(num_sequences):
            sequence = values[i:i+window_size]
            sequences.append((point_id, sequence))
    return sequences

window_size = 10  # You can adjust the window size based on your data and preference
sequences = create_sequences(data, window_size)

# Split the data into training and testing sets
train_sequences, test_sequences = train_test_split(sequences, test_size=0.2, random_state=42)

# Step 3: Build and train the LSTM model
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(64, input_shape=input_shape))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification, so using sigmoid activation

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Prepare the training data
X_train = np.array([seq for _, seq in train_sequences])
y_train = np.array([data[data['Point ID'] == point_id]['class'].iloc[-1] for point_id, _ in train_sequences])

# Prepare the testing data
X_test = np.array([seq for _, seq in test_sequences])
y_test = np.array([data[data['Point ID'] == point_id]['class'].iloc[-1] for point_id, _ in test_sequences])

# Standardize the input data for better training
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)



In [None]:
# Build and train the LSTM model
input_shape = (window_size, X_train.shape[2])
model = build_lstm_model(input_shape)
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.1)




In [None]:
model.evaluate(X_test,y_test)



[0.008222227916121483, 0.997183084487915]

In [None]:
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
history_df = pd.DataFrame(history.history)
# Create subplots with 2 rows and 1 column
fig = make_subplots(rows=2, cols=1, subplot_titles=('Model Loss', 'Model Accuracy'))

# Add loss trace to subplot 1
fig.add_trace(go.Scatter(x=history_df.index, y=history_df['loss'], mode='lines', name='Loss'), row=1, col=1)
fig.add_trace(go.Scatter(x=history_df.index, y=history_df['val_loss'], mode='lines', name='Validation Loss'), row=1, col=1)

# Add accuracy trace to subplot 2
fig.add_trace(go.Scatter(x=history_df.index, y=history_df['accuracy'], mode='lines', name='Accuracy'), row=2, col=1)
fig.add_trace(go.Scatter(x=history_df.index, y=history_df['val_accuracy'], mode='lines', name='Validation Accuracy'), row=2, col=1)

fig.update_layout(height=800, width=990, template='plotly_white')

# Show the plot
fig.show()