## Superficie, production et rendement des principales cultures par région
<br>
Dataset: https://senegal.opendataforafrica.org/xlupqbd/superficie-production-et-rendement-des-principales-cultures-par-r%C3%A9gion

There is currently an error with googletrans, the new version is only available on pip. See: https://github.com/ssut/py-googletrans/issues/234

In [1]:
#!pip install googletrans==3.1.0a0

In [1]:
import pandas as pd
import csv
from pathlib import Path
import googletrans
from googletrans import Translator
import json
import pprint

%matplotlib inline

## Setting up the data files

In [2]:
# reading in the agricultural data
data_folder = Path.cwd().parent.parent.joinpath('data').joinpath('external').joinpath('ANSD')
#print(data_folder)
input_filename = 'ObservationData_ertqifb.csv'
input_file = Path(data_folder).joinpath(input_filename)
print(input_file)

G:\Omdena-food\GPSDD-Food-Security-Senegal\data\external\ANSD\ObservationData_ertqifb.csv


In [3]:
df_input = pd.read_csv(input_file)  

In [4]:
df_input

Unnamed: 0,region,culture,indicateur,Unit,Date,Value
0,DAKAR,ARACHIDE,Production (Tonne),tonne,2017,1884.493889
1,DAKAR,ARACHIDE,Production (Tonne),tonne,2018,55.762016
2,DAKAR,ARACHIDE,Superficie (Ha),Ha,2017,3140.823149
3,DAKAR,ARACHIDE,Superficie (Ha),Ha,2018,371.746770
4,DAKAR,ARACHIDE,Rendement (Kg/Ha),KG/HA,2017,764.142857
...,...,...,...,...,...,...
643,ZIGUINCHOR,SESAME,Production (Tonne),tonne,2018,662.293667
644,ZIGUINCHOR,SESAME,Superficie (Ha),Ha,2017,1158.322779
645,ZIGUINCHOR,SESAME,Superficie (Ha),Ha,2018,1274.155057
646,ZIGUINCHOR,SESAME,Rendement (Kg/Ha),KG/HA,2017,571.769527


## Trying to use googletrans for translation
Note: from time to time Google blocks the library and does not return translations, but the original text.

In [26]:
try:
    translations = json.load(open('fr_to_en.json', 'r'))
except:
    translations = {}
translations

{'ARACHIDE': 'peanut',
 'MAIS': 'corn',
 'MANIOC': 'cassava',
 'MIL': 'millet',
 'NIEBE': 'cowpea',
 'SORGHO': 'sorghum',
 'PASTEQUE': 'watermelon',
 'SESAME': 'sesame',
 'RIZ': 'rice',
 'COTON': 'cotton',
 'FONIO': 'fonio',
 'region': 'region',
 'culture': 'culture',
 'indicateur': 'indicator',
 'Unit': 'unit',
 'Date': 'date',
 'Value': 'value',
 'Production (Tonne)': 'production (t)',
 'Superficie (Ha)': 'area (ha)',
 'Rendement (Kg/Ha)': 'yield (kg/ha)',
 'tonne': 'ton',
 'Ha': 'ha',
 'KG/HA': 'kg/ha'}

In [27]:
columns_to_translate = ['culture', 'indicateur', 'Unit']

In [32]:
# use translate method to translate a string - by default, the destination language is english
translator = Translator()

new_translations = {}
for column in columns_to_translate:
    # unique elements of the selected columns
    unique_elements = df_input[column].unique()
    cols = df_input.columns
    
    for element in unique_elements:
        #print(element)
        # add translation to the dictionary 
        if element in translations:
            pass
        else:
            print(f'Waiting for translation of: {element}')
            new_translations[element] = translator.translate(element, dest='en', src='fr').text
            #print(new_translations[element])
            
    for col in cols:
        if col in translations:
            pass
        else:
            print(f'Waiting for translation of: {col}')
            new_translations[col] = translator.translate(col, dest='en', src='fr').text

if len(new_translations)== 0:
    print("No new items found.")
else:
    print("\n Finished translation, please check the results: \n")
    pprint.pprint(new_translations, width=1)

No new items found.


In [29]:
# apply any corrections to the suggested translations
new_translations['ARACHIDE'] = 'peanut'
new_translations['MAIS'] = 'corn'
new_translations['MANIOC'] = 'cassava'
new_translations['MIL'] = 'millet'
new_translations['NIEBE'] = 'cowpea'
new_translations['SORGHO'] = 'sorghum'
new_translations['PASTEQUE'] = 'watermelon'
new_translations['SESAME'] = 'sesame'
new_translations['RIZ'] = 'rice'
new_translations['COTON'] = 'cotton'
new_translations['FONIO'] = 'fonio'
new_translations['tonne'] = 'ton'
new_translations['indicateur'] = 'indicator'
new_translations['Value'] = 'value'
new_translations['Unit'] = 'unit'
new_translations['Superficie (Ha)'] = 'area (ha)'
new_translations['Production (Tonne)'] = 'production (t)'
new_translations['Rendement (Kg/Ha)'] = 'yield (kg/ha)'
new_translations['Ha'] = 'ha'
new_translations['KG/HA'] = 'kg/ha'
new_translations['Unit'] = 'unit'
new_translations['Date'] = 'date'

In [30]:
# update the original dictionary
new_keys = list(new_translations.keys())
new_keys
for new_key in new_keys:
    translations[new_key] =  new_translations[new_key]
translations

{'ARACHIDE': 'peanut',
 'MAIS': 'corn',
 'MANIOC': 'cassava',
 'MIL': 'millet',
 'NIEBE': 'cowpea',
 'SORGHO': 'sorghum',
 'PASTEQUE': 'watermelon',
 'SESAME': 'sesame',
 'RIZ': 'rice',
 'COTON': 'cotton',
 'FONIO': 'fonio',
 'region': 'region',
 'culture': 'culture',
 'indicateur': 'indicator',
 'Unit': 'unit',
 'Date': 'date',
 'Value': 'value',
 'Production (Tonne)': 'production (t)',
 'Superficie (Ha)': 'area (ha)',
 'Rendement (Kg/Ha)': 'yield (kg/ha)',
 'tonne': 'ton',
 'Ha': 'ha',
 'KG/HA': 'kg/ha'}

In [23]:
json.dump(translations, open('fr_to_en.json', 'w'))

## Translate the dataframe

In [24]:
df_en = df_input.copy()

In [None]:
# modify all the terms of the data frame by using the previously created dictionary
for column in columns_to_translate:
    #df_en[column].map(translations, na_action='ignore')  # mapping is considered faster, but does not work 
    df_en[column].replace(translations, inplace = True)
df_en.rename(columns=translations, inplace=True)

In [25]:
# check translation
df_en.head()

Unnamed: 0,region,culture,indicator,unit,date,value
0,DAKAR,peanut,production (t),ton,2017,1884.493889
1,DAKAR,peanut,production (t),ton,2018,55.762016
2,DAKAR,peanut,area (ha),ha,2017,3140.823149
3,DAKAR,peanut,area (ha),ha,2018,371.74677
4,DAKAR,peanut,yield (kg/ha),kg/ha,2017,764.142857


## Example use of translate
dest: define the output language, default is "en" for English

In [8]:
translator.translate('안녕하세요', dest='de').text

'안녕하세요'