In [1]:
import pandas as pd
import numpy as np

# Read in Data files
country_land = pd.read_csv('data/country_land.csv')
land_use_df = pd.read_csv('data/global-land-use-since-10000bc.csv')




In [2]:
# Pivot the dataframe to turn the 'Entity' values into columns
pivoted_df = land_use_df.pivot(index='Year', columns='Entity', values='area_aggregated_categories')

# Reset the index
pivoted_df.reset_index(inplace=True)



In [3]:
# Filter out the rows where 'Year' is before -2000
filtered_df = pivoted_df[pivoted_df['Year'] >= -2000].copy()



In [4]:

filtered_df.loc[:, 'Entity'] = 'World'
filtered_df.loc[:, 'Code'] = 'OWID_WRL'



In [5]:
# Reorder the columns
filtered_df = filtered_df[['Entity', 'Code', 'Year', 'Cropland', 'Pasture', 'Permanent ice', 'Semi-natural land', 'Urban', 'Villages', 'Wild barren land', 'Wild woodlands']]


In [6]:
# Display the first few rows of the filtered dataframe
filtered_df.head()

Entity,Entity.1,Code,Year,Cropland,Pasture,Permanent ice,Semi-natural land,Urban,Villages,Wild barren land,Wild woodlands
8,World,OWID_WRL,-2000,405925.8,1093.226,2725568.373,86305813.23,1360.1656,1742.49687,24758798.71,17603737.13
9,World,OWID_WRL,-1000,844290.8,1001585.0,2720972.057,85642679.14,4843.91682,4829.56554,24358634.78,17226203.67
10,World,OWID_WRL,0,1741559.0,1639259.0,2704668.543,84629744.86,31626.34486,31944.69626,24134239.42,16890996.75
11,World,OWID_WRL,100,1767095.0,1759832.0,2706814.884,84818676.48,42782.48874,34466.69391,24023481.67,16650889.34
12,World,OWID_WRL,200,1697587.0,1827159.0,2709404.687,84991954.06,53495.00687,41164.1032,23961458.93,16521815.84


In [7]:
# Perform an outer join to include all data from both dataframes
clean_combined = pd.merge(country_land, filtered_df, how='outer', on=['Entity', 'Code', 'Year'])

In [8]:
# Display the first few rows of the final dataframe
clean_combined.head()

Unnamed: 0,Entity,Code,Year,Cereals allocated to other uses,Cereals allocated to animal feed,Cereals allocated to human food,Needed habitable land area,Cropland,Pasture,Permanent ice,Semi-natural land,Urban,Villages,Wild barren land,Wild woodlands
0,Afghanistan,AFG,2014,7000.0,704000.0,6072000.0,,,,,,,,,
1,Afghanistan,AFG,2015,5000.0,748000.0,6265000.0,,,,,,,,,
2,Afghanistan,AFG,2016,7000.0,472000.0,6426000.0,,,,,,,,,
3,Afghanistan,AFG,2017,7000.0,403000.0,6746000.0,,,,,,,,,
4,Afghanistan,AFG,2018,8000.0,410000.0,6761000.0,,,,,,,,,


In [10]:
clean_combined.to_csv('data/cleancombined.csv')