In [16]:
import EDA_Poverty as data
import pandas as pd 
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model,Model
from keras.layers import Dense,Flatten,Concatenate,Input,Embedding
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt


In [17]:
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)
import warnings
warnings.filterwarnings('ignore')

In [18]:
Poverty_1=data.get_poverty_1_data()
Poverty_1

<class 'pandas.core.frame.DataFrame'>
Index: 4411 entries, 0 to 4876
Columns: 108 entries, country to p50_p10_index
dtypes: float64(102), int64(3), object(3)
memory usage: 3.7+ MB
<class 'pandas.core.frame.DataFrame'>
Index: 4411 entries, 0 to 4876
Columns: 108 entries, country to p50_p10_index
dtypes: float64(102), int64(3), object(3)
memory usage: 3.7+ MB


Unnamed: 0,country,year,hc_ratio,international_poverty_gap,$1_poverty_gap
0,Albania,1996,0.920669,0.140051,0.011726
1,El Salvador,2009,7.419960,1.988661,0.152806
2,El Salvador,2010,6.345635,1.617116,0.142835
3,El Salvador,2011,5.340929,1.250314,0.070231
4,El Salvador,2012,4.637148,1.117315,0.067280
...,...,...,...,...,...
3638,Argentina,2007,1.723668,0.663743,0.293898
3639,Argentina,2006,2.118256,0.796947,0.259502
3640,Argentina,2005,2.631179,0.895969,0.315964
3641,Argentina,2003,5.279263,1.944457,0.679037


In [19]:
country_encoder=LabelEncoder()
country_encoded=country_encoder.fit_transform(Poverty_1[['country']])
country_encoded_df=pd.DataFrame(country_encoded).rename({0:'country'},axis=1)
X=pd.concat([country_encoded_df['country'],Poverty_1[['year','hc_ratio','international_poverty_gap','$1_poverty_gap']]],axis=1)

In [20]:
X

Unnamed: 0,country,year,hc_ratio,international_poverty_gap,$1_poverty_gap
0,0,1996,0.920669,0.140051,0.011726
1,42,2009,7.419960,1.988661,0.152806
2,42,2010,6.345635,1.617116,0.142835
3,42,2011,5.340929,1.250314,0.070231
4,42,2012,4.637148,1.117315,0.067280
...,...,...,...,...,...
3638,3,2007,1.723668,0.663743,0.293898
3639,3,2006,2.118256,0.796947,0.259502
3640,3,2005,2.631179,0.895969,0.315964
3641,3,2003,5.279263,1.944457,0.679037


In [21]:
y=Poverty_1[['international_poverty_gap','$1_poverty_gap']]
y

Unnamed: 0,international_poverty_gap,$1_poverty_gap
0,0.140051,0.011726
1,1.988661,0.152806
2,1.617116,0.142835
3,1.250314,0.070231
4,1.117315,0.067280
...,...,...
3638,0.663743,0.293898
3639,0.796947,0.259502
3640,0.895969,0.315964
3641,1.944457,0.679037


In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
num_countries = len(X['country'].unique())
year_shape = (X[['year']].shape[1],)
hc_ratio_shape = (X[['hc_ratio']].shape[1],)

train_country = X_train['country']  # First column: country
train_year = X_train['year']     # Second column: year
train_hc_ratio = X_train['hc_ratio'] # Third column: headcount

test_country = X_test['country']  # First column: country
test_year = X_test['year']     # Second column: year
test_hc_ratio = X_test['hc_ratio'] # Third column: headcount


In [24]:
def create_model(num_countries,year_shape,hc_ratio_shape):
    country_input = Input(shape=(1,), name='country')
    year_input = Input(shape=(1,), name='year')
    hc_ratio_input=Input(shape=(1,),name='hc_ratio')
    
    country_embedding = Embedding(input_dim=num_countries, output_dim=15)(country_input)
    country_embedding = Flatten()(country_embedding)
    
    
    concatenated_inputs = Concatenate()([country_embedding, year_input, hc_ratio_input])
    
    dense_layer = Dense(128, activation='relu')(concatenated_inputs)
    dense_layer = Dense(64, activation='relu')(dense_layer)
    dense_layer = Dense(32, activation='relu')(dense_layer)
    dense_layer = Dense(16, activation='relu')(dense_layer)
    output = Dense(2, activation='linear')(dense_layer)
    
    model=Model(inputs=[country_input,year_input,hc_ratio_input],outputs=output)
    
    model.compile(optimizer='adam', loss='mean_squared_error',metrics=['mae'])
    
    return model


In [25]:
model=create_model(num_countries,year_shape,hc_ratio_shape)

model.summary()

In [26]:
model.fit([train_country, train_year, train_hc_ratio], y_train, epochs=1000, batch_size=32, validation_split=0.2,callbacks=EarlyStopping(
                                                                                monitor='val_loss',  
                                                                                 patience=10,
                                                                                restore_best_weights=True  
                                                                                    )
          )

Epoch 1/1000


I0000 00:00:1731477143.037650    8188 service.cc:146] XLA service 0x7f484401e190 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1731477143.037688    8188 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2024-11-13 11:22:23.099375: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-11-13 11:22:23.266315: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90101


[1m46/73[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 3ms/step - loss: 4352.1860 - mae: 45.6366 

I0000 00:00:1731477146.353287    8188 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 107ms/step - loss: 3133.3040 - mae: 35.4771 - val_loss: 30.9879 - val_mae: 3.3904
Epoch 2/1000
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 25.3179 - mae: 2.8601 - val_loss: 18.1049 - val_mae: 2.1332
Epoch 3/1000
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 15.4737 - mae: 2.1298 - val_loss: 12.4218 - val_mae: 1.7231
Epoch 4/1000
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 10.2562 - mae: 1.8566 - val_loss: 8.6663 - val_mae: 1.6529
Epoch 5/1000
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 7.0542 - mae: 1.4718 - val_loss: 6.4716 - val_mae: 1.3427
Epoch 6/1000
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 7.0854 - mae: 1.5223 - val_loss: 5.4684 - val_mae: 0.9823
Epoch 7/1000
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - 

<keras.src.callbacks.history.History at 0x7f498b5b5190>

In [27]:

predictions = model.predict([test_country, test_year, test_hc_ratio])
predictions

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step 


array([[25.245502  ,  8.03598   ],
       [ 0.45529696,  0.35683054],
       [ 7.6702075 ,  1.1292877 ],
       ...,
       [17.499601  ,  4.6018486 ],
       [ 0.21906134,  0.20555873],
       [ 3.4912982 ,  0.13383098]], dtype=float32)

In [28]:
from sklearn.metrics import mean_squared_error, r2_score

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)

print(f"RMSE: {rmse}")
print(f"R-squared: {r2}")

RMSE: 0.9616490380859699
R-squared: 0.9026753306388855


# Preparing model for deployment

In [29]:
from tensorflow.keras.models import load_model
import pickle

ModuleNotFoundError: No module named 'tensorflow.keras'

In [None]:
filename='poverty_predictor.h5'
model.save(filename)



In [None]:
model = load_model('poverty_predictor.h5')

with open('/home/savyasanchisharma/Poverty-Prediction-Model/Web App/Trained Models and Encoder/country_encoder.pkl', 'rb') as file:
    country_encoder = pickle.load(file)

def predict_poverty(country, year, hc_ratio):
    country_code=country_encoder.transform([country])
    country_code = np.array([country_code]).reshape(-1, 1)
    year = np.array([year]).reshape(-1, 1)
    hc_ratio = np.array([hc_ratio]).reshape(-1, 1)
    
    # Make predictions using the model
    prediction = model.predict([country_code, year, hc_ratio])
    
    return prediction[0][0],prediction[0][1]  # Return the predicted value



In [None]:
predict_poverty('Albania',2020,90)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step


(45.93731, 18.801865)