In [1]:
import numpy as np 
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('master_conservation_projects_database.csv',encoding='mac_roman')

In [12]:
data['Project Size (hectares)'].fillna('10,000', inplace=True)

In [13]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [14]:
# import module
from geopy.geocoders import Nominatim
  
# initialize Nominatim API
geolocator = Nominatim(user_agent="geoapiExercises")
  
# Latitude & Longitude input
Latitude = "2.269494556"
Longitude = "-72.75852614"
  
location = geolocator.reverse(Latitude+","+Longitude)
  
address = location.raw['address']
  
# traverse the data
city = address.get('city', '')
state = address.get('state', '')
country = address.get('country', '')
code = address.get('country_code')
zipcode = address.get('postcode')
print('City : ', city)
print('State : ', state)
print('Country : ', country)
print('Zip Code : ', zipcode)

City :  
State :  Guaviare
Country :  Colombia
Zip Code :  None


In [15]:
def set_state(x):
    
    if str(x['Project State']) == 'nan':
        
        Latitude = str(x['Latitude'])
        Longitude = str(x['Longitude'])

        location = geolocator.reverse(Latitude+","+Longitude)

        if "state" in location.raw['address'].keys():
            state = location.raw['address']['state']
            
        elif "region" in location.raw['address'].keys():
            state = location.raw['address']['region']
        
        elif "territory" in location.raw['address'].keys():
            state = location.raw['address']['territory']
        
        else:    
            return np.nan

        # just the american ones have the abbreviation
        if state in us_state_abbrev:
            return us_state_abbrev[state]
        else:
            return state
    
    else:
        return x['Project State']

In [16]:
def set_city(x):
    
    if str(x['Project City']).lower() == 'nan':
        
        Latitude = str(x['Latitude'])
        Longitude = str(x['Longitude'])

        location = geolocator.reverse(Latitude+","+Longitude)

        # there are several combinations
        
        if "town" in location.raw['address'].keys():
            return location.raw['address']['town']
        
        elif "city" in location.raw['address'].keys():
            return location.raw['address']['city']
        
        elif "county" in location.raw['address'].keys():
            return location.raw['address']['county']
        
        elif "village" in location.raw['address'].keys():
            return location.raw['address']['village']
        
        elif "suburb" in location.raw['address'].keys():
            return location.raw['address']['suburb']
        
        elif "state_district" in location.raw['address'].keys():
            return location.raw['address']['state_district']
        
        else:
            return np.nan
    
    else:
        return x['Project City']

In [17]:
df_cleaned = data.copy()
df_cleaned['Project State'] = data.apply(lambda x : set_state(x), axis=1)
df_cleaned['Project City'] = data.apply(lambda x : set_city(x), axis=1)

In [18]:
df_cleaned.at[21,'Project City'] = "Samraong"
df_cleaned.at[34,'Project City'] = "Bengalon"
df_cleaned.at[83,'Project City'] = "Luwanda"

In [19]:
df_cleaned.isnull().sum()[['Project City','Project State']]

Project City     0
Project State    0
dtype: int64

In [20]:
df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84 entries, 0 to 83
Data columns (total 22 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Organization                  84 non-null     object 
 1   HQ                            84 non-null     object 
 2   Year Established              84 non-null     int64  
 3   Type of Organization          84 non-null     object 
 4   Size (AUM)                    84 non-null     object 
 5   AUM                           84 non-null     float64
 6   Project Name                  84 non-null     object 
 7   Financial Asset Class         84 non-null     object 
 8   Natural Asset Class           84 non-null     object 
 9   Returns Profile (Target IRR)  84 non-null     object 
 10  Sustainability Profile        84 non-null     object 
 11  Project Size (hectares)       84 non-null     object 
 12  Tree Species                  65 non-null     object 
 13  Project

In [30]:
!pip install plotly



In [31]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly
import plotly.graph_objs as go

init_notebook_mode(connected=True)

In [35]:
import plotly.express as px

# Relations Of Different Columns Against 'Project Size (hectares)'

In [46]:
fig = px.histogram(data, x="Organization", y="Project Size (hectares)", marginal="rug",color ="HQ")
fig.show()

In [52]:
fig = px.pie(data, values='Project Size (hectares)', names='Organization', title='Organization vs Project Size (hectares)')
fig.show()

In [63]:
fig = px.histogram(data, x="Type of Organization", y="Project Size (hectares)", marginal="rug",color ="HQ")
fig.show()

In [51]:
fig = px.histogram(data, x="Type of Organization", y="Project Size (hectares)", marginal="rug",color ="Project Country")
fig.show()

In [55]:
fig = px.pie(data, values='Project Size (hectares)', names='Financial Asset Class', title='Project Size (hectares) vs Financial Asset Class')
fig.show()

In [57]:
fig = px.pie(data, values='Project Size (hectares)', names='Natural Asset Class', title='Project Size (hectares) vs Natural Asset Class')
fig.show()

In [65]:
fig = px.pie(data, values='Project Size (hectares)', names='Project State', title='Project Size (hectares) vs Project State')
fig.show()

In [68]:
fig = px.histogram(data, x="Returns Profile (Target IRR)", y="Project Size (hectares)", marginal="rug",color ="Organization")
fig.show()

In [71]:
fig = px.histogram(data, x="Returns Profile (Target IRR)", y="Project Size (hectares)", marginal="rug",color ="Type of Organization")
fig.show()

In [72]:
fig = px.histogram(data, x="Returns Profile (Target IRR)", y="Project Size (hectares)", marginal="rug",color ="Project Country")
fig.show()

In [75]:
fig = px.density_heatmap(data, x="Returns Profile (Target IRR)", y="Project Size (hectares)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [81]:
fig = px.density_heatmap(data, x="Financial Asset Class", y="Project Size (hectares)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [82]:
fig = px.density_heatmap(data, x="Natural Asset Class", y="Project Size (hectares)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [118]:
fig = px.pie(data, values='Project Size (hectares)', names='Year Established', title='Project Size (hectares) vs Year Established')
fig.show()

# Relations Of Different Columns Against 'Returns Profile (Target IRR)'

In [119]:
fig = px.pie(data, values='Year Established', names='Returns Profile (Target IRR)', title='Year Established vs Returns Profile (Target IRR)')
fig.show()

In [86]:
fig = px.density_heatmap(data, x="Organization", y="Returns Profile (Target IRR)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [94]:
fig = px.density_heatmap(data, x="Type of Organization", y="Returns Profile (Target IRR)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [95]:
fig = px.density_heatmap(data, x="Financial Asset Class", y="Returns Profile (Target IRR)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [98]:
fig = px.density_heatmap(data, x="Natural Asset Class", y="Returns Profile (Target IRR)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [99]:
fig = px.density_heatmap(data, x="Project Country", y="Returns Profile (Target IRR)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [105]:
fig = px.density_heatmap(data, x="Project State", y="Returns Profile (Target IRR)", marginal_x="histogram", marginal_y="histogram")
fig.show()

In [120]:
fig = px.pie(data, values='Project Size (hectares)', names='Returns Profile (Target IRR)', title='Project Size (hectares) vs Returns Profile (Target IRR)')
fig.show()