In [1]:
import numpy as np
import pandas as pd

# Reading data

Here we read the data into pandas

In [2]:
datafile = pd.ExcelFile('data.xls')
df = datafile.parse('Sheet1')
df

Unnamed: 0,Landgrabbed,Landgrabber,Base,Sector,Hectares,Production,Projected investment,Status of deal,Summary
0,Algeria,Al Qudra,UAE,"Finance, real estate",31000.0,"Milk, olive oil, potatoes",,Done,Al Qudra Holding is a joint-stock company esta...
1,Angola,CAMC Engineering Co. Ltd,China,Construction,1500.0,Rice,US$77 million,Done,CAMCE is a subsidiary of the China National Ma...
2,Angola,ENI,Italy,Energy,12000.0,Oil palm,,In process,The project is a joint venture between Sonango...
3,Angola,AfriAgro,Portugal,"Finance, real estate",5000.0,Oil palm,US$30-35 million,Done,AfriAgro is a subsidiary of the Portugal-based...
4,Angola,Eurico Ferreira,Portugal,"Energy, telecommunications\n",30000.0,Sugar cane,US$200 million,Done,"In 2008, Portuguese conglomerate Eurico Ferrei..."
5,Angola,Quifel Natural Resources,Portugal,"Agribusiness, energy",10000.0,Oilseed,,Done,Quifel Natural Resources is part of Portugal's...
6,Angola,Lonrho,UK,Agribusiness,25000.0,Rice,,Done (50-yr lease),"In 2005, all that remained of Lonrho, once one..."
7,Argentina,Grupo Maggi,Brazil,Agribusiness,7000.0,Soybeans,,Done,"Grupo Maggi, controlled by Blairo Maggi, one o..."
8,Argentina,Beidahuang,China,Agribusiness,320000.0,"Maize, soybeans, wheat","US$1,500 million",Suspended,State-owned Beidahuang is the largest farming ...
9,Argentina,Ingleby Company,Denmark,Finance,12433.0,"Barley, maize, soybeans, sunflower, wheat",,Done,"The Ingleby Company, which is owned by the Rau..."


## Trim data

to test the trim we can assert for the first item, which should be fixed now

In [3]:
df['Status of deal'] = df['Status of deal'].str.strip()

In [4]:
df.isnull().sum()

Landgrabbed               0
Landgrabber               0
Base                      0
Sector                   10
Hectares                  2
Production               34
Projected investment    310
Status of deal            0
Summary                   0
dtype: int64

In [5]:
df.fillna('Missing')

Unnamed: 0,Landgrabbed,Landgrabber,Base,Sector,Hectares,Production,Projected investment,Status of deal,Summary
0,Algeria,Al Qudra,UAE,"Finance, real estate",31000,"Milk, olive oil, potatoes",Missing,Done,Al Qudra Holding is a joint-stock company esta...
1,Angola,CAMC Engineering Co. Ltd,China,Construction,1500,Rice,US$77 million,Done,CAMCE is a subsidiary of the China National Ma...
2,Angola,ENI,Italy,Energy,12000,Oil palm,Missing,In process,The project is a joint venture between Sonango...
3,Angola,AfriAgro,Portugal,"Finance, real estate",5000,Oil palm,US$30-35 million,Done,AfriAgro is a subsidiary of the Portugal-based...
4,Angola,Eurico Ferreira,Portugal,"Energy, telecommunications\n",30000,Sugar cane,US$200 million,Done,"In 2008, Portuguese conglomerate Eurico Ferrei..."
5,Angola,Quifel Natural Resources,Portugal,"Agribusiness, energy",10000,Oilseed,Missing,Done,Quifel Natural Resources is part of Portugal's...
6,Angola,Lonrho,UK,Agribusiness,25000,Rice,Missing,Done (50-yr lease),"In 2005, all that remained of Lonrho, once one..."
7,Argentina,Grupo Maggi,Brazil,Agribusiness,7000,Soybeans,Missing,Done,"Grupo Maggi, controlled by Blairo Maggi, one o..."
8,Argentina,Beidahuang,China,Agribusiness,320000,"Maize, soybeans, wheat","US$1,500 million",Suspended,State-owned Beidahuang is the largest farming ...
9,Argentina,Ingleby Company,Denmark,Finance,12433,"Barley, maize, soybeans, sunflower, wheat",Missing,Done,"The Ingleby Company, which is owned by the Rau..."


## Fixing projected investments

In [6]:
df['Projected investement fixed'] = df['Projected investment'].replace("billion", "000 million").replace("[^0-9-]+", "", regex=True)
df

Unnamed: 0,Landgrabbed,Landgrabber,Base,Sector,Hectares,Production,Projected investment,Status of deal,Summary,Projected investement fixed
0,Algeria,Al Qudra,UAE,"Finance, real estate",31000.0,"Milk, olive oil, potatoes",,Done,Al Qudra Holding is a joint-stock company esta...,
1,Angola,CAMC Engineering Co. Ltd,China,Construction,1500.0,Rice,US$77 million,Done,CAMCE is a subsidiary of the China National Ma...,77
2,Angola,ENI,Italy,Energy,12000.0,Oil palm,,In process,The project is a joint venture between Sonango...,
3,Angola,AfriAgro,Portugal,"Finance, real estate",5000.0,Oil palm,US$30-35 million,Done,AfriAgro is a subsidiary of the Portugal-based...,30-35
4,Angola,Eurico Ferreira,Portugal,"Energy, telecommunications\n",30000.0,Sugar cane,US$200 million,Done,"In 2008, Portuguese conglomerate Eurico Ferrei...",200
5,Angola,Quifel Natural Resources,Portugal,"Agribusiness, energy",10000.0,Oilseed,,Done,Quifel Natural Resources is part of Portugal's...,
6,Angola,Lonrho,UK,Agribusiness,25000.0,Rice,,Done (50-yr lease),"In 2005, all that remained of Lonrho, once one...",
7,Argentina,Grupo Maggi,Brazil,Agribusiness,7000.0,Soybeans,,Done,"Grupo Maggi, controlled by Blairo Maggi, one o...",
8,Argentina,Beidahuang,China,Agribusiness,320000.0,"Maize, soybeans, wheat","US$1,500 million",Suspended,State-owned Beidahuang is the largest farming ...,1500
9,Argentina,Ingleby Company,Denmark,Finance,12433.0,"Barley, maize, soybeans, sunflower, wheat",,Done,"The Ingleby Company, which is owned by the Rau...",


In [21]:
df.groupby("Projected investment").size()

# Insert pivot magic here

Projected investment
US$1,240 million                                                                              1
US$1,500 million                                                                              1
US$1,876 million                                                                              1
US$1.2/ha/yr (after first 7 years) in Gambela and US$8/ha/yr (after first 6 years) in Bako    1
US$1.3 billion                                                                                1
US$1.6 billion                                                                                1
US$10 million                                                                                 1
US$100 million                                                                                3
US$100 million                                                                                2
US$111 million                                                                                1
US$12.5 million    

## Structural problems