In [2]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
import requests
import urllib.request
import time
from urllib.request import urlopen

In [76]:
url="https://en.wikipedia.org/wiki/List_of_countries_by_life_expectancy"
html=urlopen(url)
soup=bs(html, "html.parser")

In [77]:
tables=soup.find_all("table", id="CIA2017")

In [35]:
import re
def change_type(text):
    return float(re.sub(r'[^\w\s.]','', text))

In [78]:
countries=[]
males=[]
females=[]
averages=[]

for table in tables:
    rows=table.find_all('tr')
    
    for row in rows:
        cells = row.find_all('td')
        
        if len(cells) > 1:
            
            country =cells[0]
            countries.append(country.text.strip())

            male = cells[1]
            males.append(change_type(male.text.strip()))

            try:
                female = cells[2]
                females.append(change_type(female.text.strip()))
            except ValueError:
                females.append(float(69.4))

            average = cells[3]
            averages.append(change_type(average.text.strip()))


In [79]:
len(averages)

224

In [39]:
ranks = list(map(int, range(1,225)))

In [80]:
data={
    "Countries": countries,
    "Rank": ranks,
    "Males": males,
    "Females": females,
    "Both sex": averages
}

In [81]:
df = pd.DataFrame(data)

In [82]:
df.head(10)

Unnamed: 0,Countries,Rank,Males,Females,Both sex
0,Monaco,1,85.6,93.5,89.4
1,Japan,2,81.9,88.8,85.3
2,Singapore,3,82.6,88.1,85.2
3,Macau,4,81.6,87.7,84.6
4,San Marino,5,80.8,86.1,83.3
5,Iceland,6,80.9,85.4,83.1
6,Hong Kong,7,80.4,85.9,83.0
7,Andorra,8,80.7,85.2,82.9
8,Guernsey,9,79.9,85.4,82.6
9,Switzerland,10,80.3,85.1,82.6


In [44]:
url1="https://en.m.wikipedia.org/wiki/List_of_countries_by_forest_area"
html1=urlopen(url)
soup1=bs(html, "html.parser")

In [45]:
tables1=soup.find_all("table", class_="wikitable")[1]

In [61]:
countries1=[]
xx90s=[]
xx00s=[]
xx10s=[]
xx20s=[]

for table in tables:
    rows=table.find_all('tr')
    
    for row in rows:
        cells = row.find_all('td')
        
        if len(cells) > 1:
            
            country =cells[0]
            countries1.append(country.text.strip())

            xx90 = cells[1]
            xx90s.append(change_type(xx90.text.strip()))
            
            xx00 = cells[2]
            xx00s.append(change_type(xx00.text.strip()))

            xx10 = cells[3]
            xx10s.append(change_type(xx10.text.strip()))

            xx20 = cells[4]
            xx20s.append(change_type(xx20.text.strip()))


In [62]:
data1={
    "Countries": countries1,
    "1990": xx90s,
    "2000": xx00s,
    "2010": xx10s,
    "2020": xx20s
}

In [63]:
df1 = pd.DataFrame(data1)

In [64]:
df1.head()

Unnamed: 0,Countries,1990,2000,2010,2020
0,Afghanistan,1208.0,1208.0,1208.0,1208.0
1,Albania,789.0,769.0,782.0,789.0
2,Algeria,1667.0,1579.0,1918.0,1949.0
3,American Samoa,18.0,18.0,17.0,17.0
4,Andorra,16.0,16.0,16.0,16.0


In [91]:
#dropping unnecessary columns
df1.drop(["1990","2000", "2010"], axis=1, inplace=True)

In [92]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 194 entries, 0 to 193
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Countries  194 non-null    object 
 1   2020       194 non-null    float64
dtypes: float64(1), object(1)
memory usage: 3.2+ KB


In [95]:
final_data = pd.merge(
                df, df1,
                on=["Countries"]
            )
final_data.rename(columns={"2020":"Forest area (1000 ha)"}, inplace=True)

In [96]:
final_data.head()

Unnamed: 0,Countries,Rank,Males,Females,Both sex,Forest area (1000 ha)
0,Monaco,1,85.6,93.5,89.4,0.0
1,Japan,2,81.9,88.8,85.3,24935.0
2,Singapore,3,82.6,88.1,85.2,16.0
3,San Marino,5,80.8,86.1,83.3,1.0
4,Iceland,6,80.9,85.4,83.1,51.0


In [97]:
final_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 179 entries, 0 to 178
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Countries              179 non-null    object 
 1   Rank                   179 non-null    int64  
 2   Males                  179 non-null    float64
 3   Females                179 non-null    float64
 4   Both sex               179 non-null    float64
 5   Forest area (1000 ha)  179 non-null    float64
dtypes: float64(4), int64(1), object(1)
memory usage: 9.8+ KB


In [99]:
final_data.to_csv("Life expectancy and forest area data.csv", index=False)

In [19]:
final_data=pd.read_csv("Life expectancy and forest area data.csv")
pop=pd.read_csv("Population data.csv")
pop.head()

Unnamed: 0,Country Name,Country Code,Population
0,Aruba,ABW,106314.0
1,Afghanistan,AFG,38041754.0
2,Angola,AGO,31825295.0
3,Albania,ALB,2854191.0
4,Andorra,AND,77142.0


In [20]:
df3= pd.merge(final_data, pop, on="Country Name")
df3.head()

Unnamed: 0,Country Name,Rank,Males,Females,Both sex,Forest area (1000 ha),Country Code,Population
0,Monaco,1,85.6,93.5,89.4,0.0,MCO,38964.0
1,Japan,2,81.9,88.8,85.3,24935.0,JPN,126264931.0
2,Singapore,3,82.6,88.1,85.2,16.0,SGP,5703569.0
3,San Marino,5,80.8,86.1,83.3,1.0,SMR,33860.0
4,Iceland,6,80.9,85.4,83.1,51.0,ISL,361313.0


In [8]:
df3.to_csv("Countries tree and life data.csv", index=False)

In [25]:
df3["Population"]=df3["Population"]/1000

In [42]:
df3.rename(columns={"Both sex": "Average"}, inplace=True)
df3.head()

Unnamed: 0,Country Name,Rank,Males,Females,Average,Forest area (1000 ha),Country Code,Population (1000)
0,Monaco,1,85.6,93.5,89.4,0.0,MCO,38.964
1,Japan,2,81.9,88.8,85.3,24935.0,JPN,126264.931
2,Singapore,3,82.6,88.1,85.2,16.0,SGP,5703.569
3,San Marino,5,80.8,86.1,83.3,1.0,SMR,33.86
4,Iceland,6,80.9,85.4,83.1,51.0,ISL,361.313


In [9]:
import plotly.express as px

In [30]:
#showing ratio of male to femaleslife expectancy
fig = px.scatter(df3, x="Males", y="Females", hover_data=["Country Name", "Rank"])
fig.show()

In [43]:
#showing life expectancy ratio tree planted
fig=px.scatter(df3, x="Average", y="Forest area (1000 ha)", hover_data=["Country Name", "Rank"])
fig.show()

In [44]:
#showing population ratio tree planted
fig=px.scatter(df3, x="Population (1000)", y="Forest area (1000 ha)", hover_data=["Country Name", "Average"], log_x=True)
fig.show()

In [51]:
#showing map view of population
fig = px.choropleth(df3, locations="Country Code", color="Population (1000)", hover_name="Country Name",
                    color_continuous_scale="Plasma")
fig.update_layout(title="A Map showing population")
fig.show()

In [56]:
#showing map view of forest area
fig = px.choropleth(df3, locations="Country Code", color="Forest area (1000 ha)", hover_data=["Country Name", "Average"],
                    color_continuous_scale="Plasma")
fig.update_layout(title="A Map showing Forest area (1000 ha)")
fig.show()

In [55]:
#showing map view of forest area
fig = px.choropleth(df3, locations="Country Code", color="Average", hover_data=["Country Name", "Males", "Females"],
                    color_continuous_scale="Plasma")
fig.update_layout(title="A Map showing Life expectancy")
fig.show()