<a href="https://colab.research.google.com/github/JanithBo/learning-terraform-3087701/blob/main/LSTM_model_to_'Female_group_(%3C14_years).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load the dataset
file_path = "/content/leprosy-Dataset -for-testing.csv"
data = pd.read_csv(file_path)

# Inspect unique values in 'Gender' column
print("Unique values in 'Gender' column before cleaning:", data['Gender'].unique())

# Check the distribution of ages
print("\nAge distribution before cleaning:")
print(data['Age'].value_counts())

# Standardize the 'Gender' column to avoid issues with spaces or capitalization
data['Gender'] = data['Gender'].str.strip().str.lower()

# Inspect unique values in 'Gender' column after cleaning
print("Unique values in 'Gender' column after cleaning:", data['Gender'].unique())

# Filter for the Female group (<14 years)
female_data = data[(data['Age'] < 14) & (data['Gender'] == 'female')]

# Check the number of records
female_under_14_count = len(female_data)
print(f"\nNumber of records for Females (<14 years): {female_under_14_count}")

# If no data is available, print a message and exit
if female_under_14_count == 0:
    print("No data found for the Female group (<14 years). Exiting.")
else:
    # Prepare data for LSTM model training and forecasting
    def prepare_data(female_data, time_column, target_column):
        # Group the data by month (or other relevant time period) for time series forecasting
        female_data = female_data.groupby(female_data['DateDetection'].dt.to_period('M')).size()
        female_data.index = female_data.index.to_timestamp()

        # Check if the grouped data is empty
        if len(female_data) == 0:
            raise ValueError("Not enough data to train the model. Please ensure there is sufficient data.")

        # Normalize the data using MinMaxScaler
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(np.array(female_data).reshape(-1, 1))

        return scaled_data, scaler, female_data.index

    # Create LSTM model
    def build_lstm_model(time_step):
        model = Sequential()
        model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
        model.add(LSTM(50, return_sequences=False))
        model.add(Dense(25))
        model.add(Dense(1))
        model.compile(optimizer='adam', loss='mean_squared_error')
        return model

    # Prepare data for LSTM training
    def create_dataset(data, time_step=12):
        X, Y = [], []
        for i in range(len(data) - time_step):
            X.append(data[i:(i + time_step), 0])
            Y.append(data[i + time_step, 0])
        return np.array(X), np.array(Y)

    # Train and evaluate LSTM for each group
    def train_and_evaluate_lstm(group_data, group_name):
        print(f"\nTraining LSTM for group: {group_name}")

        # Prepare the data
        scaled_data, scaler, time_index = prepare_data(group_data, 'DateDetection', 'Detections')

        # Create the LSTM dataset
        time_step = 12  # 12 months
        X, Y = create_dataset(scaled_data, time_step)

        # Check if there's enough data to train the model
        if len(X) == 0 or len(Y) == 0:
            print(f"Not enough data for {group_name} to train the model.")
            return

        # Reshape input to be [samples, time steps, features]
        X = X.reshape(X.shape[0], X.shape[1], 1)

        # Split the data into training and testing sets
        train_size = int(len(X) * 0.8)
        X_train, X_test = X[:train_size], X[train_size:]
        Y_train, Y_test = Y[:train_size], Y[train_size:]

        # Build the LSTM model
        model = build_lstm_model(time_step)

        # Train the model
        model.fit(X_train, Y_train, batch_size=1, epochs=20, verbose=0)

        # Make predictions
        train_predict = model.predict(X_train)
        test_predict = model.predict(X_test)

        # Invert the scaling to get original values
        train_predict_inverse = scaler.inverse_transform(train_predict)
        test_predict_inverse = scaler.inverse_transform(test_predict)
        Y_train_inverse = scaler.inverse_transform([Y_train])
        Y_test_inverse = scaler.inverse_transform([Y_test])

        # Calculate RMSE and MAE
        train_rmse = np.sqrt(mean_squared_error(Y_train_inverse[0], train_predict_inverse))
        test_rmse = np.sqrt(mean_squared_error(Y_test_inverse[0], test_predict_inverse))
        train_mae = mean_absolute_error(Y_train_inverse[0], train_predict_inverse)
        test_mae = mean_absolute_error(Y_test_inverse[0], test_predict_inverse)

        # Print the results
        print(f"Training RMSE for {group_name}: {train_rmse}")
        print(f"Test RMSE for {group_name}: {test_rmse}")
        print(f"Training MAE for {group_name}: {train_mae}")
        print(f"Test MAE for {group_name}: {test_mae}")

        # Plotting the results
        plt.figure(figsize=(10, 6))

        # Plot historical data
        plt.plot(time_index, group_data, label='Historical Data', color='blue')

        # Create prediction indices for plotting
        train_predict_index = time_index[time_step:train_size + time_step]
        test_predict_index = time_index[train_size + time_step:]

        # Plot train predictions
        plt.plot(train_predict_index, train_predict_inverse, label='Train Predictions', color='green')

        # Plot test predictions
        plt.plot(test_predict_index, test_predict_inverse, label='Test Predictions', color='red')

        plt.title(f'LSTM Model - Leprosy Case Detection Forecast ({group_name})')
        plt.xlabel('Date')
        plt.ylabel('Number of Detections')
        plt.legend()
        plt.grid(True)
        plt.show()

    # Train and evaluate the LSTM model for the female group
    train_and_evaluate_lstm(female_data, 'Females (<14 years)')


Unique values in 'Gender' column before cleaning: ['F' 'M' 'S' '2' ',' 'N' 'T' 'D' nan '4' ']' 'K']

Age distribution before cleaning:
Age
23.0     788
30.0     785
24.0     780
25.0     773
20.0     736
        ... 
95.0       2
93.0       1
589.0      1
335.0      1
1.0        1
Name: count, Length: 96, dtype: int64
Unique values in 'Gender' column after cleaning: ['f' 'm' 's' '2' ',' 'n' 't' 'd' nan '4' ']' 'k']

Number of records for Females (<14 years): 0
No data found for the Female group (<14 years). Exiting.
