**-- Importing Libraries**

In [105]:
# Install the 'country_list' library
!pip install country_list

# Import necessary libraries
from country_list import countries_for_language  # Library for getting country names
import pandas as pd  # Data manipulation
from sklearn.impute import SimpleImputer  # Data imputation
import matplotlib.pyplot as plt # Plotting
import seaborn as sns # Data visualization
import numpy as np #Numerical operations
from sklearn.preprocessing import StandardScaler #Data scaling
from sklearn.cluster import KMeans #K-Means clustering


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## Tasks:

-- *1. Data Collection:*

    - Access the United Nations data repository or API to retrieve relevant datasets on population and economic indicators for various countries.
    - Download or fetch the necessary datasets in a format suitable for analysis (e.g., CSV, Excel, JSON).

-- **Importing Datasets**

In [106]:
##Economic Indicators


#DATA 1 Population Growth Rates in Urban areas and Capital cities
df_PGR = pd.read_csv('Predata/SYB61_253_Population.csv')

    - Inspect the data for missing values, duplicates, and outliers. Address missing values and outliers using appropriate techniques.

-- Cleaning Datasets

In [107]:
df_PGR

Unnamed: 0,Country,Year,Series,Capital City,Value
0,Argentina,2005,Capital city population (thousands),Buenos Aires,13330
1,Argentina,2010,Capital city population (thousands),Buenos Aires,14246
2,Argentina,2015,Capital city population (thousands),Buenos Aires,14706
3,Argentina,2018,Capital city population (thousands),Buenos Aires,14967
4,Brazil,2005,Capital city population (thousands),Brasilia,3301
...,...,...,...,...,...
255,United Kingdom,2010,Urban population (percent growth rate per annum),,1.3
256,United Kingdom,2015,Urban population (percent growth rate per annum),,1
257,United States of America,2005,Urban population (percent growth rate per annum),,1.1
258,United States of America,2010,Urban population (percent growth rate per annum),,1.1


In [108]:
# Pivot the DataFrame
df_PGR = df_PGR.pivot(index=['Country', 'Year'], columns='Series', values='Value').reset_index()

# Rename the columns if needed
df_PGR.columns.name = None


In [109]:
df_PGR

Unnamed: 0,Country,Year,Capital city population (thousands),Population aged 0 to 14 years old (percentage),Population aged 60+ years old (percentage),Population density,Population mid-year estimates (millions),Rural population (percent growth rate per annum),Urban population (percent growth rate per annum)
0,Argentina,2005,13330.0,,,,,-0.6,1.3
1,Argentina,2010,14246.0,25.6,14.6,14.7,41.1,-0.7,1.2
2,Argentina,2015,14706.0,24.7,15.3,15.5,43.26,-0.4,1.2
3,Argentina,2018,14967.0,,,,,,
4,Argentina,2020,,23.7,16.0,16.1,45.04,,
5,Argentina,2022,,23.1,16.2,16.3,45.51,,
6,Brazil,2005,3301.0,,,,,-0.5,1.7
7,Brazil,2010,3710.0,24.8,10.3,23.5,196.35,-0.8,1.4
8,Brazil,2015,4168.0,22.5,11.9,24.6,205.19,-1.0,1.2
9,Brazil,2018,4470.0,,,,,,


In [110]:
# Remove NaN values
df_PGR.dropna()

Unnamed: 0,Country,Year,Capital city population (thousands),Population aged 0 to 14 years old (percentage),Population aged 60+ years old (percentage),Population density,Population mid-year estimates (millions),Rural population (percent growth rate per annum),Urban population (percent growth rate per annum)
1,Argentina,2010,14246,25.6,14.6,14.7,41.1,-0.7,1.2
2,Argentina,2015,14706,24.7,15.3,15.5,43.26,-0.4,1.2
7,Brazil,2010,3710,24.8,10.3,23.5,196.35,-0.8,1.4
8,Brazil,2015,4168,22.5,11.9,24.6,205.19,-1.0,1.2
13,Canada,2010,1218,16.6,19.9,3.7,33.96,0.3,1.3
14,Canada,2015,1308,16.2,22.3,3.9,35.73,0.7,1.1
19,Germany,2010,3450,13.6,25.9,233.3,81.33,-1.0,0.1
20,Germany,2015,3514,13.2,27.3,235.5,82.07,0.0,0.3
25,India,2010,21988,31.0,7.8,417.3,1240.61,1.0,2.6
26,India,2015,25866,28.6,8.9,444.9,1322.87,0.7,2.4


-- Displaying all the dataframes into CSV

In [111]:
column_data_types_1 = df_PGR.dtypes
print(column_data_types_1)

Country                                             object
Year                                                 int64
Capital city population (thousands)                 object
Population aged 0 to 14 years old (percentage)      object
Population aged 60+ years old (percentage)          object
Population density                                  object
Population mid-year estimates (millions)            object
Rural population (percent growth rate per annum)    object
Urban population (percent growth rate per annum)    object
dtype: object


In [112]:
# Convert columns by position (column number) to float
columns_to_convert_1 = [2,6]  # Put the column position in a list
# Loop through the selected column positions and apply the conversion
for column_pos in columns_to_convert_1:  # Iterate through the list
    df_PGR.iloc[:, column_pos] = df_PGR.iloc[:, column_pos].str.replace(',', '').astype(float)

In [113]:
df_PGR

Unnamed: 0,Country,Year,Capital city population (thousands),Population aged 0 to 14 years old (percentage),Population aged 60+ years old (percentage),Population density,Population mid-year estimates (millions),Rural population (percent growth rate per annum),Urban population (percent growth rate per annum)
0,Argentina,2005,13330.0,,,,,-0.6,1.3
1,Argentina,2010,14246.0,25.6,14.6,14.7,41.1,-0.7,1.2
2,Argentina,2015,14706.0,24.7,15.3,15.5,43.26,-0.4,1.2
3,Argentina,2018,14967.0,,,,,,
4,Argentina,2020,,23.7,16.0,16.1,45.04,,
5,Argentina,2022,,23.1,16.2,16.3,45.51,,
6,Brazil,2005,3301.0,,,,,-0.5,1.7
7,Brazil,2010,3710.0,24.8,10.3,23.5,196.35,-0.8,1.4
8,Brazil,2015,4168.0,22.5,11.9,24.6,205.19,-1.0,1.2
9,Brazil,2018,4470.0,,,,,,


In [114]:
df_PGR=df_PGR.dropna()


In [115]:
# Reset the index after dropping rows with NaN values
# df_PGR = df_PGR.reset_index(drop=True)
df_PGR.to_csv('data/Population.csv', index=False)