In [121]:
# Imports & show some of the first rows

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

grain = pd.read_csv('data.csv', encoding='ISO-8859-1"', sep=";")

grain.tail()

Unnamed: 0,Landgrabbed,Landgrabber,Base,Sector,Hectares,Production,Projected investment,Status of deal,Summary
411,Zambia,Export Trading Group,Singapore,Agribusiness,57000.0,"Food crops, jatropha",,Done,"ETG, owned by Kenya's Patel family, is incorpo..."
412,Zambia,AG-ZAM,South Africa,Agribusiness,15000.0,Sugar cane,US$251 million,Done,In April 2011 the Zambia Development Agency an...
413,Zambia,Chayton Capital,UK,Finance,20000.0,Crops,US$85 million,Done,Chayton Capital is a US$300-million London-bas...
414,Zambia,Emvest,UK,Finance,2513.0,"Banana, maize, wheat",,Done,UK private equity firm Emergent Asset Manageme...
415,Zimbabawe,Emvest,UK,Finance,9913.0,,,Done,UK private equity firm Emergent Asset Manageme...


In [122]:
# 2: "Use the TRIM and CLEAN functions"

grain['Landgrabbed'] = grain['Landgrabbed'].str.strip()
grain['Landgrabber'] = grain['Landgrabber'].str.strip()
grain['Base'] = grain['Base'].str.strip()
grain['Sector'] = grain['Sector'].str.strip()
# grain['Hectares'] = grain['Hectares'].str.strip()  # Not a string
grain['Production'] = grain['Production'].str.strip()
grain['Projected investment'] = grain['Projected investment'].str.strip()
grain['Status of deal'] = grain['Status of deal'].str.strip()
grain['Summary'] = grain['Summary'].str.strip()

# Show some info about our dataframe
grain.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416 entries, 0 to 415
Data columns (total 9 columns):
Landgrabbed             416 non-null object
Landgrabber             416 non-null object
Base                    416 non-null object
Sector                  406 non-null object
Hectares                414 non-null float64
Production              382 non-null object
Projected investment    106 non-null object
Status of deal          416 non-null object
Summary                 416 non-null object
dtypes: float64(1), object(8)
memory usage: 29.3+ KB


In [123]:
# 3: "Blank cells – missing data that should be there"

missing = 'Missing'
zero = '0'

grain['Landgrabbed'] = grain['Landgrabbed'].fillna(missing)
grain['Landgrabber'] = grain['Landgrabber'].fillna(missing)
grain['Base'] = grain['Base'].fillna(missing)
grain['Sector'] = grain['Sector'].fillna(missing)
grain['Hectares'] = grain['Hectares'].fillna(0)
grain['Production'] = grain['Production'].fillna(missing)
grain['Projected investment'] = grain['Projected investment'].fillna(zero)
grain['Status of deal'] = grain['Status of deal'].fillna(missing)
grain['Summary'] = grain['Summary'].fillna(missing)

grain.tail()

Unnamed: 0,Landgrabbed,Landgrabber,Base,Sector,Hectares,Production,Projected investment,Status of deal,Summary
411,Zambia,Export Trading Group,Singapore,Agribusiness,57000.0,"Food crops, jatropha",0,Done,"ETG, owned by Kenya's Patel family, is incorpo..."
412,Zambia,AG-ZAM,South Africa,Agribusiness,15000.0,Sugar cane,US$251 million,Done,In April 2011 the Zambia Development Agency an...
413,Zambia,Chayton Capital,UK,Finance,20000.0,Crops,US$85 million,Done,Chayton Capital is a US$300-million London-bas...
414,Zambia,Emvest,UK,Finance,2513.0,"Banana, maize, wheat",0,Done,UK private equity firm Emergent Asset Manageme...
415,Zimbabawe,Emvest,UK,Finance,9913.0,Missing,0,Done,UK private equity firm Emergent Asset Manageme...


In [124]:
# 4: "Fixing numbers that aren’t numbers"

# Insert a new column
grain.insert(loc=7, column='Projected Investment (US$ millions)', value=grain['Projected investment'])

grain.tail()

Unnamed: 0,Landgrabbed,Landgrabber,Base,Sector,Hectares,Production,Projected investment,Projected Investment (US$ millions),Status of deal,Summary
411,Zambia,Export Trading Group,Singapore,Agribusiness,57000.0,"Food crops, jatropha",0,0,Done,"ETG, owned by Kenya's Patel family, is incorpo..."
412,Zambia,AG-ZAM,South Africa,Agribusiness,15000.0,Sugar cane,US$251 million,US$251 million,Done,In April 2011 the Zambia Development Agency an...
413,Zambia,Chayton Capital,UK,Finance,20000.0,Crops,US$85 million,US$85 million,Done,Chayton Capital is a US$300-million London-bas...
414,Zambia,Emvest,UK,Finance,2513.0,"Banana, maize, wheat",0,0,Done,UK private equity firm Emergent Asset Manageme...
415,Zimbabawe,Emvest,UK,Finance,9913.0,Missing,0,0,Done,UK private equity firm Emergent Asset Manageme...


In [125]:
# Edit the new column

# Remove 'US$' from the left and 'million' from the right
grain['Projected Investment (US$ millions)'] = grain['Projected Investment (US$ millions)'].map(lambda x: x.lstrip('US$').rstrip('million'))       
grain['Projected Investment (US$ millions)'] = grain['Projected Investment (US$ millions)'].replace("[^0-9-]+", "", regex=True).str.strip()

grain.tail()

Unnamed: 0,Landgrabbed,Landgrabber,Base,Sector,Hectares,Production,Projected investment,Projected Investment (US$ millions),Status of deal,Summary
411,Zambia,Export Trading Group,Singapore,Agribusiness,57000.0,"Food crops, jatropha",0,0,Done,"ETG, owned by Kenya's Patel family, is incorpo..."
412,Zambia,AG-ZAM,South Africa,Agribusiness,15000.0,Sugar cane,US$251 million,251,Done,In April 2011 the Zambia Development Agency an...
413,Zambia,Chayton Capital,UK,Finance,20000.0,Crops,US$85 million,85,Done,Chayton Capital is a US$300-million London-bas...
414,Zambia,Emvest,UK,Finance,2513.0,"Banana, maize, wheat",0,0,Done,UK private equity firm Emergent Asset Manageme...
415,Zimbabawe,Emvest,UK,Finance,9913.0,Missing,0,0,Done,UK private equity firm Emergent Asset Manageme...


In [128]:
# Pivot tables
grain['Hectares'] = pd.to_numeric(grain['Hectares'])

pivot = grain.pivot_table(index=['Projected Investment (US$ millions)'], aggfunc='first')

pivot.tail()

Unnamed: 0_level_0,Base,Hectares,Landgrabbed,Landgrabber,Production,Projected investment,Sector,Status of deal,Summary
Projected Investment (US$ millions),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
8,India,27000.0,Ethiopia,BHO Agro,"Cereal, oilseeds, pulses",US$8/ha/yr (lease),Agribusiness,Done,"In May 2010, BHO Bio Products signed an agreem..."
80,US,10000.0,Brazil,Bunge,Sugar cane,US$80 million,Agribusiness,In process,"In 2010, US-based Bunge, one of the five large..."
83,Saudi Arabia,12306.0,Argentina,Almarai Co,"Maize, soybean",US$83 million,Agribusiness,Done,"Almarai, the largest dairy company in the Gulf..."
85,UK,20000.0,Zambia,Chayton Capital,Crops,US$85 million,Finance,Done,Chayton Capital is a US$300-million London-bas...
879,China,200000.0,Brazil,Chongqing Grain Group,Soybean,US$879 million,Agribusiness,In process,Chongqing Grain Group is one of China's larges...
