# Fertility Rate Predictions

A deep nueral network model which predicts the fertility rate for any asked year and country using time series data.

The data is taken from THE WORLD BANK.
https://data.worldbank.org/indicator/SP.DYN.TFRT.IN?locations=IN


### Step 1: Import data and libraries

In [1]:
import numpy as np 
import pandas as pd 
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Embedding
from tensorflow.keras.preprocessing import sequence
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras.layers import Lambda
from keras.layers import Bidirectional,LSTM
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD

Using TensorFlow backend.


In [2]:
Data = pd.read_csv("Fertility_rate.csv")
Data["2020"] = 0

In [3]:
dicts = {}
countries = Data["Country Code"]
i = 0
for key in countries:
    dicts[key] = i
    i=i+1

In [4]:
Data.head()

Unnamed: 0,Country Name,Country Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,ABW,4.82,4.655,4.471,4.271,4.059,3.842,3.625,3.417,...,1.779,1.795,1.813,1.834,1.854,1.872,1.886,1.896,1.901,0
1,Africa Eastern and Southern,AFE,6.723308,6.738651,6.752818,6.7654,6.775406,6.783357,6.789885,6.79604,...,4.956842,4.882058,4.804516,4.72622,4.647637,4.569675,4.493744,4.420264,4.349433,0
2,Afghanistan,AFG,7.45,7.45,7.45,7.45,7.45,7.45,7.45,7.45,...,5.77,5.562,5.359,5.163,4.976,4.8,4.633,4.473,4.321,0
3,Africa Western and Central,AFW,6.439002,6.455523,6.471399,6.487246,6.502619,6.51905,6.537615,6.560078,...,5.557872,5.503781,5.446144,5.384336,5.319473,5.251674,5.182391,5.113003,5.044144,0
4,Angola,AGO,6.708,6.79,6.872,6.954,7.036,7.116,7.194,7.267,...,6.12,6.039,5.953,5.864,5.774,5.686,5.6,5.519,5.442,0


### Step 2: Building required functions

In [5]:
def window(a,window_size=1): # Takes a 1-D Series and a number
    b = len(a)
    c = b-window_size
    data = np.zeros([c+1,window_size])
    for i in range(c+1):
        data[i] = a[i:i+window_size]
    return data

def label_feature(data): # Takes a numpy 2-D Array
    m = data.shape[1]
    copy_data = data.copy()
    copy_data = copy_data.T
    y = copy_data[m-1]
    x = copy_data[0:m-1].T
    return (x,y)

def shuffle(b): # Takes a list which is made of x_train and x_test
    n = b[0]
    m = n.shape[0]
    x = np.zeros(n.shape)
    y = np.zeros(m)
    indices = np.random.permutation(m)
    list_ = []
    for i in range(m):
        x[i],y[i] = (b[0][indices[i]],b[1][indices[i]])
    list_ = (x,y)
    return list_

def dataset(a, window_size = 1):
    windowed_data = window(a,window_size)
    x,y = label_feature(windowed_data)
    list1 = (x,y)
    final = shuffle(list1)
    return final

In [6]:
Dataset = Data.to_numpy()

### Step 3: The model Formation (DNN)

In [7]:
def models(x_train , y_train):
    model = Sequential([
        Lambda(lambda x:tf.expand_dims(x,axis=-1),input_shape = [None]),
        Bidirectional(LSTM(32,return_sequences=True)),
        Bidirectional(LSTM(32)),
        Dense(1),
        Lambda(lambda x: x*100.0)
    ])
    model.compile(loss="mse",optimizer = SGD(lr=1e-5,momentum=0.9))
    model.fit(x_train , y_train, epochs = 50 , verbose = 0)
    return model

### Step 4: Final predictor

This takes the year and the country code required as the input and gives out the fertility rate.

In [8]:
def fertility(code ,curr_year):
    i = dicts[code]
    train = Dataset[i][2:-1]
    x_train , y_train = dataset(train , 11)
    x_val = x_train[45:]
    y_val = y_train[45:]
    x_train = x_train[:45]
    y_train = y_train[:45]
    model = models(x_train , y_train)
    print("Accuracy - " , model.evaluate(x_val , y_val))
    test = Dataset[i][-11:-1]
    np.reshape(test , (10,1))
    year = curr_year - 2020 + 1
    ans = 0
    for i in range(year):
        ans = model.predict(test[:10][np.newaxis])
        np.append(test,ans)
        np.delete(test , 0)
    return ans

### Example

In [10]:
fertility("IND" , 2023)

Accuracy -  0.079723060131073


array([[1.731548]], dtype=float32)