In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.optimizers import Adam
from tensorflow.keras.losses import Huber
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
import gradio as gr

In [2]:
# Load dataset
df = pd.read_csv("cleaned.csv")

# Relevant columns for training
features = [
    'Energy supply adequacy (%)',
    'Protein supply (g/cap/day)',
    'Animal protein supply (g/cap/day)',
    'Cereal import dependency (%)',
    'Energy supply (kcal/cap/day)',
    'Energy from cereals (%)'
]
target = 'Undernourishment (%)'

# Define time steps (using the last 5 years to predict the next year)
time_steps = 5

# To store predictions for each country
predictions_by_country = {}

# Group the dataframe by 'Country'
for country, country_df in df.groupby('Country'):
    try:
        # Normalize the data (independently for each country)
        scaler = MinMaxScaler()
        scaled_features = scaler.fit_transform(country_df[features])
        scaled_target = scaler.fit_transform(country_df[[target]])

        # Combine scaled features and target back
        df_scaled = np.hstack([scaled_features, scaled_target])

        # Create sequences (sliding windows) of the last `time_steps` years
        def create_sequences(data, time_steps):
            x, y = [], []
            for i in range(len(data) - time_steps):
                x.append(data[i:i + time_steps, :-1])  # All features except the target
                y.append(data[i + time_steps, -1])  # The target (Undernourishment)
            return np.array(x), np.array(y)

        # Create training data for the country
        X, y = create_sequences(df_scaled, time_steps)

        # Build the LSTM model with hyperparameter tuning
        model = Sequential()
        model.add(tf.keras.Input(shape=(X.shape[1], X.shape[2])))
        model.add(LSTM(units=200, return_sequences=True))
        model.add(Dropout(0.3))
        model.add(LSTM(units=150, return_sequences=True))
        model.add(Dropout(0.3))
        model.add(LSTM(units=100))
        model.add(Dense(50, activation='relu', kernel_regularizer='l2'))
        model.add(Dense(1))

        # Compile the model with Huber Loss and a reduced learning rate
        optimizer = Adam(learning_rate=0.001)
        huber_loss = Huber()
        model.compile(optimizer=optimizer, loss=huber_loss)

        # Early stopping and learning rate reduction to avoid overfitting
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)

        # Train the model with validation split
        model.fit(X, y, epochs=300, batch_size=32, validation_split=0.2, callbacks=[early_stopping, reduce_lr])

        # Predict the future undernourishment percentages for 2023, 2024, and 2025
        future_predictions = []
        current_input = df_scaled[-time_steps:, :-1].reshape(1, time_steps, len(features))

        for _ in range(3):  # Predict for 3 years
            predicted_undernourishment = model.predict(current_input)

            # Inverse transform the prediction (only for the target column)
            predicted_undernourishment = scaler.inverse_transform(
                np.hstack([np.zeros((1, len(features))), predicted_undernourishment])
            )[:, -1]

            future_predictions.append(predicted_undernourishment[0])

            # Prepare the input for the next prediction
            predicted_value = np.zeros((1, 1, len(features)))  # Create an array of zeros
            predicted_value[0, 0, 0] = predicted_undernourishment  # Place the predicted value

            # Shift the input and append the predicted value
            current_input = np.append(current_input[:, 1:, :], predicted_value, axis=1)

        # Store predictions for the current country
        predictions_by_country[country] = future_predictions

    except Exception as e:
        print(f"Error processing country {country}: {e}")

# Output the predictions for each country for the years 2023, 2024, and 2025
for country, predictions in predictions_by_country.items():
    print(f"Country: {country}, Predicted Undernourishment for 2023: {predictions[0]:.4f}, 2024: {predictions[1]:.4f}, 2025: {predictions[2]:.4f}")

# Initialize lists to store true and predicted values
all_true_values = []
all_predictions = []

# Group the dataframe by 'Country' for evaluation
for country, country_df in df.groupby('Country'):
    # Store true values for comparison (assuming these are the last 3 actual values)
    true_values = country_df[target].values[-3:]  # Last 3 years for true values
    all_true_values.extend(true_values)  # Add true values to the list
    all_predictions.extend(predictions_by_country.get(country, [0, 0, 0]))  # Handle missing predictions

# Calculate metrics
mae = mean_absolute_error(all_true_values, all_predictions)
mse = mean_squared_error(all_true_values, all_predictions)
rmse = np.sqrt(mse)
r2 = r2_score(all_true_values, all_predictions)

# Display the overall metrics
print(f"Overall Metrics: MAE: {mae:.4f}, MSE: {mse:.4f}, RMSE: {rmse:.4f}, R²: {r2:.4f}")

# Calculate and display metrics for each year
true_2023, true_2024, true_2025 = all_true_values[::3], all_true_values[1::3], all_true_values[2::3]
pred_2023, pred_2024, pred_2025 = all_predictions[::3], all_predictions[1::3], all_predictions[2::3]

# Metrics for 2023
mae_2023 = mean_absolute_error(true_2023, pred_2023)
mse_2023 = mean_squared_error(true_2023, pred_2023)
rmse_2023 = np.sqrt(mse_2023)
r2_2023 = r2_score(true_2023, pred_2023)

# Metrics for 2024
mae_2024 = mean_absolute_error(true_2024, pred_2024)
mse_2024 = mean_squared_error(true_2024, pred_2024)
rmse_2024 = np.sqrt(mse_2024)
r2_2024 = r2_score(true_2024, pred_2024)

# Metrics for 2025
mae_2025 = mean_absolute_error(true_2025, pred_2025)
mse_2025 = mean_squared_error(true_2025, pred_2025)
rmse_2025 = np.sqrt(mse_2025)
r2_2025 = r2_score(true_2025, pred_2025)

# Display metrics for each year
print(f"Metrics for 2023: MAE: {mae_2023:.4f}, MSE: {mse_2023:.4f}, RMSE: {rmse_2023:.4f}, R²: {r2_2023:.4f}")
print(f"Metrics for 2024: MAE: {mae_2024:.4f}, MSE: {mse_2024:.4f}, RMSE: {rmse_2024:.4f}, R²: {r2_2024:.4f}")
print(f"Metrics for 2025: MAE: {mae_2025:.4f}, MSE: {mse_2025:.4f}, RMSE: {rmse_2025:.4f}, R²: {r2_2025:.4f}")


Epoch 1/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - loss: 0.7019 - val_loss: 0.7082 - learning_rate: 0.0010
Epoch 2/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - loss: 0.6856 - val_loss: 0.6824 - learning_rate: 0.0010
Epoch 3/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 0.6719 - val_loss: 0.6622 - learning_rate: 0.0010
Epoch 4/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 0.6612 - val_loss: 0.6488 - learning_rate: 0.0010
Epoch 5/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - loss: 0.6506 - val_loss: 0.6411 - learning_rate: 0.0010
Epoch 6/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.6394 - val_loss: 0.6375 - learning_rate: 0.0010
Epoch 7/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - loss: 0.6290 - val_loss: 0.6371 - learning_rate: 0.0010
Epoch 8/

In [3]:
# Save the model after training
model.save("lstm_undernourishment_model.h5")



In [4]:
# Load the saved model when needed
from tensorflow.keras.models import load_model
model = load_model("lstm_undernourishment_model.h5")



In [5]:
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model

# Load dataset and saved model
df = pd.read_csv("cleaned.csv")
model = load_model("lstm_undernourishment_model.h5")

# Relevant columns for features
features = [
    'Energy supply adequacy (%)',
    'Protein supply (g/cap/day)',
    'Animal protein supply (g/cap/day)',
    'Cereal import dependency (%)',
    'Energy supply (kcal/cap/day)',
    'Energy from cereals (%)'
]
target = 'Undernourishment (%)'

# Define time steps
time_steps = 5

# Function to predict undernourishment for a country and year
def predict_undernourishment(country, year):
    try:
        # Filter the dataframe by country
        country_df = df[df['Country'] == country]
        
        # Normalize the data (independently for each country)
        scaler = MinMaxScaler()
        scaled_features = scaler.fit_transform(country_df[features])
        scaled_target = scaler.fit_transform(country_df[[target]])
        
        # Combine scaled features and target
        df_scaled = np.hstack([scaled_features, scaled_target])
        
        # Create sequences (sliding windows) of the last `time_steps` years
        def create_sequences(data, time_steps):
            x, y = [], []
            for i in range(len(data) - time_steps):
                x.append(data[i:i + time_steps, :-1])
                y.append(data[i + time_steps, -1])
            return np.array(x), np.array(y)
        
        # Prepare the input data
        X, y = create_sequences(df_scaled, time_steps)
        
        # Get the input for prediction (last time_steps years)
        current_input = df_scaled[-time_steps:, :-1].reshape(1, time_steps, len(features))
        
        future_predictions = []
        for _ in range(3):  # Predict for 2023, 2024, 2025
            predicted_undernourishment = model.predict(current_input)
            predicted_undernourishment = scaler.inverse_transform(
                np.hstack([np.zeros((1, len(features))), predicted_undernourishment])
            )[:, -1]
            future_predictions.append(predicted_undernourishment[0])
            
            predicted_value = np.zeros((1, 1, len(features)))
            predicted_value[0, 0, 0] = predicted_undernourishment
            current_input = np.append(current_input[:, 1:, :], predicted_value, axis=1)
        
        # Return the prediction based on the selected year
        year_index = {'2023': 0, '2024': 1, '2025': 2}
        return future_predictions[year_index[year]]
    
    except Exception as e:
        return f"Error processing country {country}: {str(e)}"




In [6]:
import pandas as pd
import plotly.express as px
import gradio as gr

# Function to predict and plot undernourishment variation from 2000 to 2025 using Plotly Express
def predict_and_plot_undernourishment(country, year):
    if country in predictions_by_country:
        # Historical undernourishment data from 2000 to 2022
        historical_years = list(range(2000, 2023))
        historical_undernourishment = df[df['Country'] == country]['Undernourishment (%)'].values
        
        # Predicted data for 2023, 2024, and 2025
        future_years = [2023, 2024, 2025]
        predictions = predictions_by_country[country]
        
        # Combine historical and predicted data
        years = historical_years + future_years
        undernourishment_values = list(historical_undernourishment) + predictions
        
        # Create a DataFrame for Plotly
        data = pd.DataFrame({
            'Year': years,
            'Undernourishment (%)': undernourishment_values
        })
        
        # Create an interactive plot with Plotly Express
        fig = px.line(data, x='Year', y='Undernourishment (%)', title=f"Undernourishment Prediction for {country} (2000-2025)",
                      markers=True)
        
        # Highlight the selected year
        year_value = int(year)
        selected_value = undernourishment_values[years.index(year_value)]
        fig.add_scatter(x=[year_value], y=[selected_value], mode='markers', 
                         marker=dict(color='red', size=10), 
                         name=f'Selected Year: {year}')

        # Return the prediction and the Plotly figure directly
        return f"Predicted Undernourishment for {country} in {year}: {selected_value:.4f}%", fig
    else:
        return f"No predictions available for {country}", None

# Define Gradio input components
country_input = gr.Dropdown(choices=df['Country'].unique().tolist(), label="Country")
year_input = gr.Dropdown(choices=["2023", "2024", "2025"], label="Year")

# Create Gradio interface
interface = gr.Interface(
    fn=predict_and_plot_undernourishment,
    inputs=[country_input, year_input],
    outputs=["text", "plot"],  # Use 'plot' for Plotly figures
    title="Undernourishment Prediction",
    description="Enter a country and a year (2023, 2024, 2025) to predict undernourishment percentages and visualize trends from 2000 to 2025."
)

# Launch interface
interface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [7]:
import pandas as pd
import plotly.express as px
import gradio as gr

# Function to predict and plot undernourishment variation from 2000 to 2025 using Plotly Express
def predict_and_plot_undernourishment(country, year):
    if country in predictions_by_country:
        # Historical undernourishment data from 2000 to 2022
        historical_years = list(range(2000, 2023))
        historical_undernourishment = df[df['Country'] == country]['Undernourishment (%)'].values
        
        # Predicted data for 2023, 2024, and 2025
        future_years = [2023, 2024, 2025]
        predictions = predictions_by_country[country]
        
        # Combine historical and predicted data
        years = historical_years + future_years
        undernourishment_values = list(historical_undernourishment) + predictions
        
        # Create a DataFrame for Plotly
        data = pd.DataFrame({
            'Year': years,
            'Undernourishment (%)': undernourishment_values
        })
        
        # Create an interactive plot with Plotly Express for undernourishment
        fig_undernourishment = px.line(data, x='Year', y='Undernourishment (%)', 
                                       title=f"Undernourishment Prediction for {country} (2000-2025)",
                                       markers=True)
        
        # Highlight the selected year
        year_value = int(year)
        selected_value = undernourishment_values[years.index(year_value)]
        fig_undernourishment.add_scatter(x=[year_value], y=[selected_value], mode='markers', 
                                          marker=dict(color='red', size=10), 
                                          name=f'Selected Year: {year}')

        # Create a map visualization for the country
        # Use a basic map plot; you can customize the location and zoom
        country_data = df[df['Country'] == country]
        fig_map = px.scatter_geo(country_data, locations='Country', 
                                  locationmode='country names',
                                  title=f"{country} Location on the Map",
                                  scope='world',
                                  size_max=15,
                                  projection="natural earth")
        
        # Return the predictions and both figures
        return (f"Predicted Undernourishment for {country} in {year}: {selected_value:.4f}%", 
                fig_undernourishment, 
                fig_map)
    else:
        return f"No predictions available for {country}", None, None

# Define Gradio input components
country_input = gr.Dropdown(choices=df['Country'].unique().tolist(), label="Country")
year_input = gr.Dropdown(choices=["2023", "2024", "2025"], label="Year")

# Create Gradio interface
interface = gr.Interface(
    fn=predict_and_plot_undernourishment,
    inputs=[country_input, year_input],
    outputs=["text", "plot", "plot"],  # Two plots: one for undernourishment, one for the map
    title="Undernourishment Prediction",
    description="Enter a country and a year (2023, 2024, 2025) to predict undernourishment percentages and visualize trends from 2000 to 2025."
)

# Launch interface
interface.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




In [8]:
import gradio as gr

# Function to predict undernourishment based on country and year
def predict_undernourishment(country, year):
    # Check if predictions exist for the selected country
    if country in predictions_by_country:
        year_index = int(year) - 2023  # Map the year to the prediction index (2023=0, 2024=1, 2025=2)
        prediction = predictions_by_country[country][year_index]
    else:
        return f"No predictions available for {country}"
    
    return f"Predicted Undernourishment for {country} in {year}: {prediction:.4f}%"

# Define Gradio input components
country_input = gr.Dropdown(choices=df['Country'].unique().tolist(), label="Country")
year_input = gr.Dropdown(choices=["2023", "2024", "2025"], label="Year")

# Create the Gradio interface
interface = gr.Interface(
    fn=predict_undernourishment,
    inputs=[country_input, year_input],
    outputs="text",
    title="Undernourishment Prediction",
    description="Enter a country and year (2023, 2024, 2025) to predict undernourishment percentages."
)

# Launch the interface
interface.launch()

Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.


