# How To Manipulate and Analyze Data in Python Without Using Pandas

### Import the data

In [None]:
import csv 

# read the data with csv into list 'pop'
pop = []
with open ('pop_subset.csv', 'r') as file:
    csv_reader = csv.reader(file, delimiter=',')

    for row in csv_reader:
        pop.append(row)

# print the first 5 rows of the table (5 first elements of the list)
for row in range(0,5):
    print(pop[row])

### Create a summarized table

In [None]:
# change the population numbers to int values
for row in range(1, len(pop)):
    for column in range(1, len(pop[row])):
        pop[row][column] = int(pop[row][column])

# create table to store new data
pop_exp_dev = []

# add countries to column 0
for row in range(0, len(pop)):
    pop_exp_dev.append([])
    pop_exp_dev[row].append(pop[row][0])

# add highest/lowest projected population 
for row in range(1, len(pop)):
    lowest = min(pop[row][1:])
    lowest_index = pop[row].index(lowest)
    lowest_year = pop[0][lowest_index]

    highest = max(pop[row][1:])
    highest_index = pop[row].index(highest)
    highest_year = pop[0][highest_index]

    pop_exp_dev[row].append(lowest)
    pop_exp_dev[row].append(lowest_year)
    pop_exp_dev[row].append(highest)
    pop_exp_dev[row].append(highest_year)

    # add relative change in population 2020-2100
    dev = round((pop[row][-1]-pop[row][1])/pop[row][1]*100, 2)

    pop_exp_dev[row].append(dev)

# add column names
col_names = ['country', 'lowest_pop', 'lowest_pop_year', 'highest_pop', 'highest_pop_year', 'rel_change_2020-2100']
pop_exp_dev[0] = col_names

# export to csv file 
with open ('summarized.csv', 'w') as file:
    csv_writer = csv.writer(file, delimiter=';')
    for row in pop_exp_dev:
        csv_writer.writerow(row)

# print first 5 rows of table
for row in range(0,5):
    print(pop_exp_dev[row])

### Subset list for visualization
Subset largest population growth and largest population decline.

In [None]:
# subset: 10 countries with highest population growth and 10 countries with largest population decline

# sort by relative change, highest to lowest
pop_exp_dev_srt = sorted(pop_exp_dev[1:], reverse=True, key=lambda x: x[5])

# create new list for our subset
growth_decline = []

# append 10 first and 10 last from sorted list
for row in pop_exp_dev_srt[:11]:
    growth_decline.append(row)
for row in pop_exp_dev_srt[-10:]:
    growth_decline.append(row)

# make a horisontal bar plot
import matplotlib.pyplot as plt

# save values for x and y 
countries = []
rel_change = []
for row in range(1, len(growth_decline)):
    countries.append(growth_decline[row][0])
    rel_change.append(growth_decline[row][-1])

plt.grid()
plt.barh(countries, rel_change)
plt.title('World Population Projection 2019 - 2100.\nCountries with Largest Growth and Decline Respectively')
plt.xlabel('Change factor in percent')
plt.show()

Subset European countries.

In [None]:
# subset europe

# create a list with European countries
europe = ['Russia', 'Germany', 'United Kingdom', 'France', 'Italy', 'Spain', 'Ukraine', 'Poland', 'Romania', 'Netherlands', 
          'Belgium', 'Czech Republic', 'Greece', 'Portugal', 'Sweden', 'Hungary', 'Belarus', 'Austria', 'Serbia', 
          'Switzerland', 'Bulgaria', 'Denmark', 'Finland', 'Slovak Republic','Norway', 'Ireland', 'Croatia', 'Moldova', 
          'Bosnia and Herzegovina', 'Albania',	'Lithuania','Macedonia, FYR', 'Slovenia', 'Latvia', 'Kosovo', 'Estonia', 
          'Montenegro', 'Luxembourg', 'Malta', 'Iceland', 'Andorra', 'Monaco', 'Liechtenstein', 'San Marino', 'Holy See']

# create new empty list for Europe subset
europe_list = []

# find all countries in pop_exp_dev and add to europ_list
for row in range(1, len(pop_exp_dev)):
    if pop_exp_dev[row][0] in europe:
        europe_list.append(pop_exp_dev[row])

# sort highest to lowest
europe_list_srt = sorted(europe_list, reverse=True, key=lambda x: x[5])

### Normalize the table to compare population development between countries

In [None]:
# standardize the pop list, use 2020 as index year

# create new empty list to store new values
pop_norm = []

# add country
for row in range(len(pop)):
    pop_norm.append([])
    pop_norm[row].append(pop[row][0])

# add normalized values for each year, use 2020 as index year
for row in range(1,len(pop)):
    for column in range(1, len(pop[row])):
        pop_norm[row].append(round(pop[row][column]/pop[row][1]*100, 2))

# add column names
cols = ['COUNTRIES', '2019', '2020', '2025', '2030', '2035', '2040', '2045', '2050', '2055', '2060', '2065', '2070', '2075', '2080', '2085', '2090', '2095', '2100']
pop_norm.insert(0, cols)

# print first 5 rows
for row in pop_norm[:5]:
    print(row)