## DATA NORMALISATION
   This notebook will contain functions that normalise temperature and rainfall data.
   <br>
   The data will be normalised on a yearly basis.
   <br>
   Also we will attempt to visualise the data via the pandas module in graphs and tables.

In [None]:
## import cell
import os
import sys
import pandas as pd
import numpy as np
import scipy as scp
import xlrd
import xlsxwriter
import bokeh
import holoviews as hv
from bokeh.io import show, output_file
from bokeh.models import FactorRange
from bokeh.plotting import figure
import bokeh.layouts
from bokeh.layouts import column
hv.extension('bokeh', 'matplotlib')

In [None]:
from select_df import select_df

df_foodprices = select_df(0, 'foodprices')
df_migration = select_df(0, 'migration_movements')
df_weather = select_df(0, 'temperature_and_precipitation')

In [None]:
# unused function to collect a different dataset of normalised data
def normalised_data():
    
    print("Type yes or no")
    
    normalised_data = input("Would you like to see normalised data comparisons?: ")
    
    if normalised_data == 'yes' or normalised_data == 'no':
        return normalised_data
    else:
        normalised_data = normalised_data()
        
# input
def get_input(saved_dict, c):

    country1 = input("Please input the {}st country: ".format(c))
    
    country = correct_input(saved_dict, country1, c)
    
    return country

# changes the input to a abbrv suitable for the .csv
def correct_input(saved_dict, country, c):
    
    country = country.lower()
    
    if country in saved_dict.keys():
        country1 = saved_dict[country]
        return country1
        
    elif c == 1:      
        country = get_input(saved_dict, c)
        return country
        
    elif c == 2:
        country = get_input(saved_dict, c)
        return country
    
    
def get_target():
    target_data = input("Please input tas for temperature or pr for precipitation data: ")
    return target_data

def get_year(c):
    
    year = input("Please input year{} for country{} starting from 1991 up to and including 2015: ".format(c,c))

    try:
        if int(year) in range(1991, 2016):
            return year
        else:
            print("Please input a number for year{} starting from 1991 up to and including 2015: ".format(c))
            year = get_year(c)
            return year

    except ValueError:
        print("Please use integer/numerical values")
        year = get_year(c)
        return year
    
def get_info():

    saved_dict = {'mauritania': 'MRT', 'lesotho': 'LSO', 'somalia': 'SOM', 'nigeria': 'NGA', 'tanzania': 'TZA', 'zambia': 'ZMB', 'burundi': 'BDI', 'afghanistan': 'AFG', 'mali': 'MLI', 'niger': 'NER', 'malawi': 'MWI', 'congo': 'ZAR', 'cabo verde': 'CPV', 'sudan': 'SDN', 'pakistan': 'PAK', 'burkina faso': 'BFA', 'rwanda': 'RWA', 'kenia': 'KEN', 'senegal': 'SEN', 'cameroon': 'CMR', 'sierra leone': 'SLE', 'iraq': 'IRQ', 'uganda': 'UGA', 'mozambique': 'MOZ', 'zimbabwe': 'ZWE', 'central african republic': 'CAF', 'ethiopia': 'ETH', 'guinea': 'GIN', 'liberia': 'LBR', 'djibouti': 'DJI', 'iran': 'IRN', 'madagascar': 'MDG', 'lebanon': 'LBN'}
    c = 1
    datatypes = ['tas','pr']
    print("Useable country inputs: ")
    print('')
    
    for countries in saved_dict:
        print(countries)
    
    print('')
    
    country1 = get_input(saved_dict, c)
    
    year1 = get_year(c)

    c += 1
    country2 = get_input(saved_dict, c)

    year2 = get_year(c)
    
    target_data = get_target()
    
    while not datatypes.count(target_data):
        target_data = get_target()
    
    normalisation = 1
    # normalisation = normalised_data()
    
    return tuple([country1, country2, year1, year2, target_data, normalisation])
    
def visualise_country():
    
    data = pd.read_csv('tas_pr_1991_2015_AC.csv')
    
    target_frame = get_info()
    
    df1 = pd.DataFrame(data.loc[data['country'] == target_frame[0]])
    df1 = pd.DataFrame(df1.loc[df1['year'] == int(target_frame[2])])
    df1 = df1[target_frame[4]]
    
    df2 = pd.DataFrame(data.loc[data['country'] == target_frame[1]])
    df2 = pd.DataFrame(df2.loc[df2['year'] == int(target_frame[3])])
    df2 = df2[target_frame[4]]
    
    frame_list = [x.reset_index(drop=True) for x in [df1,df2]]
    
    df = pd.concat(frame_list, axis=1)
    
    print("{} data for {} in comparison with data for {}. From years {} and {} respectively ".format(target_frame[4],target_frame[0],target_frame[1],target_frame[2],target_frame[3]))
    
    return df

def visualise_country_manual(country1, country2, year1, year2, target_data):
    
    saved_dict = {'mauritania': 'MRT', 'lesotho': 'LSO', 'somalia': 'SOM', 'nigeria': 'NGA', 'tanzania': 'TZA', 'zambia': 'ZMB', 'burundi': 'BDI', 'afghanistan': 'AFG', 'mali': 'MLI', 'niger': 'NER', 'malawi': 'MWI', 'congo': 'ZAR', 'cabo verde': 'CPV', 'sudan': 'SDN', 'pakistan': 'PAK', 'burkina faso': 'BFA', 'rwanda': 'RWA', 'kenia': 'KEN', 'senegal': 'SEN', 'cameroon': 'CMR', 'sierra leone': 'SLE', 'iraq': 'IRQ', 'uganda': 'UGA', 'mozambique': 'MOZ', 'zimbabwe': 'ZWE', 'central african republic': 'CAF', 'ethiopia': 'ETH', 'guinea': 'GIN', 'liberia': 'LBR', 'djibouti': 'DJI', 'iran': 'IRN', 'madagascar': 'MDG', 'lebanon': 'LBN'}

    data = pd.read_csv('tas_pr_1991_2015_AC.csv')
    
    if country1 in saved_dict.keys():
        ch_country1 = saved_dict[country1]
    else:
        print("Please enter correct country names")
        
    if country2 in saved_dict.keys():
        ch_country2 = saved_dict[country1]
    else:
        print("Please enter correct country names")
    
    df1 = pd.DataFrame(data.loc[data['country'] == ch_country1])
    df1 = pd.DataFrame(df1.loc[df1['year'] == int(year1)])
    df1 = df1[target_data]
    
    df2 = pd.DataFrame(data.loc[data['country'] == ch_country2])
    df2 = pd.DataFrame(df2.loc[df2['year'] == int(year2)])
    df2 = df2[target_data]
    
    frame_list = [x.reset_index(drop=True) for x in [df1,df2]]
    
    df = pd.concat(frame_list, axis=1)
    
    print("{} data for {} in comparison with data for {}. From years {} and {} respectively ".format(target_data,country1,country2,year1,year2))
    
    return df
    

In [None]:
visualise_country()
# visualise_country_manual("GIN","CMR", 1991, 1991, "pr")

In [None]:
def single_year_data_grab(year, country, target_data):
    
    saved_dict = {'mauritania': 'MRT', 'lesotho': 'LSO', 'somalia': 'SOM', 'nigeria': 'NGA', 'tanzania': 'TZA', 'zambia': 'ZMB', 'burundi': 'BDI', 'afghanistan': 'AFG', 'mali': 'MLI', 'niger': 'NER', 'malawi': 'MWI', 'congo': 'ZAR', 'cabo verde': 'CPV', 'sudan': 'SDN', 'pakistan': 'PAK', 'burkina faso': 'BFA', 'rwanda': 'RWA', 'kenia': 'KEN', 'senegal': 'SEN', 'cameroon': 'CMR', 'sierra leone': 'SLE', 'iraq': 'IRQ', 'uganda': 'UGA', 'mozambique': 'MOZ', 'zimbabwe': 'ZWE', 'central african republic': 'CAF', 'ethiopia': 'ETH', 'guinea': 'GIN', 'liberia': 'LBR', 'djibouti': 'DJI', 'iran': 'IRN', 'madagascar': 'MDG', 'lebanon': 'LBN'}

    data = pd.read_csv('tas_pr_1991_2015_AC.csv')
    
    if country in saved_dict.keys():
        ch_country1 = saved_dict[country]
    else:
        print("Please enter correct country names")
    
    
    df1 = pd.DataFrame(data.loc[data['country'] == ch_country1])
    df1 = pd.DataFrame(df1.loc[df1['year'] == int(year)])
    df1 = df1[target_data]
    
    return df1
    # plot months on x-axis
    
    # plot precipitation on y-axis
    
def bar_graphs(year, country, target_data1, target_data2):
    
    c = 0
    data_inject1 = []
    data_inject2 = []
    
    months = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep", "Oct","Nov","Dec"]
    
    target_frame1 = single_year_data_grab(year, country, target_data1)
    target_frame2 = single_year_data_grab(year, country, target_data2)
    
    
    
    for cell in target_frame1:
        
        data_inject1.append((months[c], cell))
        
        if months[c] == "Dec":
            c = 0
        
        c += 1
    
    c = 0
    
    for cell in target_frame2:
        
        data_inject2.append((months[c], cell))
        
        if months[c] == "Dec":
            c = 0
        
        c += 1
    
    % output size = 150
    % opts Curve (color='red')
    bars = hv.Bars(data_inject1, hv.Dimension(country + " {} data from {}".format(target_data1, year)), target_data1 + "and" +target_data2)
    line = hv.Curve(data_inject2, hv.Dimension(country + " {} data from {}".format(target_data2, year)), target_data2)

    return bars * line + bars + line    

In [None]:
bar_graphs(2012,'burkina faso', 'pr', 'tas')

In [None]:
def normalise(input_data):

    values = []
    
    for value in input_data:
         values.append(value)
            
    Min = min(values)
    Max = max(values)
        
    output_data = []
    
    for value in input_data:
        output_data.append((value - Min)/(Max - Min))
    
    input_data['Normalized_data'] = output_data

    return input_data

def select_plot_temperature_and_precipitation(input_data, country, year1, year2):
    
    x_range = range(year1,year2)
    
    saved_dict = {'mauritania': 'MRT', 'lesotho': 'LSO', 'somalia': 'SOM', 'nigeria': 'NGA', 'tanzania': 'TZA', 'zambia': 'ZMB', 'burundi': 'BDI', 'afghanistan': 'AFG', 'mali': 'MLI', 'niger': 'NER', 'malawi': 'MWI', 'congo': 'ZAR', 'cabo verde': 'CPV', 'Sudan': 'SDN', 'pakistan': 'PAK', 'burkina faso': 'BFA', 'rwanda': 'RWA', 'kenia': 'KEN', 'senegal': 'SEN', 'cameroon': 'CMR', 'sierra leone': 'SLE', 'iraq': 'IRQ', 'uganda': 'UGA', 'mozambique': 'MOZ', 'zimbabwe': 'ZWE', 'central african republic': 'CAF', 'ethiopia': 'ETH', 'guinea': 'GIN', 'liberia': 'LBR', 'djibouti': 'DJI', 'iran': 'IRN', 'madagascar': 'MDG', 'lebanon': 'LBN'}

    if country in saved_dict.keys():
        country = saved_dict[country]
    else:
        print("Please enter correct country name")
    
    input_data = pd.DataFrame(input_data.loc[input_data['country'] == country])
    input_data = input_data.loc[(input_data['year'] > year1 - 1) & (input_data['year'] < year2 + 1)]
    
#   maak van de jaren en maanden kommajaren
    YearMonth = []
    month = []
    for row in input_data['month']:
        month.append(str(row))
        
    count = 0
    
    for year in input_data['year']:
        
        year = int(year) + (int(month[count])  - 1)/ 12
        YearMonth.append(year)
        count += 1
        
    input_data['YearMonth'] = YearMonth
    input_data = input_data[['pr', 'tas', 'country', 'YearMonth']]
    
    
    return input_data


def bar_graph(input_data, country, year1, year2, target_data1, target_data2):
    
    frame = select_plot_temperature_and_precipitation(input_data, country, year1, year2)

    df1 = normalise(frame[target_data1])
    df2 = normalise(frame[target_data2])
    
    #print(df1)
    
    for cell in df1:
        if type(cell) == list:
            
            data_inject1 = cell
            break
            
    for cell in df2:

        if type(cell) == list:
            
            data_inject2 = cell
            break

    % output size = 250
    % opts Curve (color='red')
    
    bars = hv.Bars(data_inject1, "{}".format(country), 'precipitation and temperature', label = 'precipitation')
    line = hv.Curve(data_inject2, "{}".format(country), 'precipitation and temperature',label = 'temperature')
    
    # function to call food data 
    
    plot = bars * line  
    
    return plot

In [None]:
bar_graph(df_weather, 'mauritania', 1992, 1995, 'pr', 'tas')