In [247]:
import numpy as np

url = 'https://raw.githubusercontent.com/edlich/eternalrepo/master/DS-WAHLFACH/countries.csv'
countries = np.genfromtxt(url, encoding="UTF-8", delimiter=',', dtype=object)
# table without header
countries_num = countries[1:, :]

In [233]:
# Number of rows
print("rows: " + str(np.size(countries,0)))
# Number of columns
print("columns:" + str(np.size(countries,1)))
# Number of countires (skip first row)
print("Number of countires:" + str(np.size(countries_num,0)))
# Average of people between all countires
people = countries_num[:,[1]].astype(int)
print("Average people: " +  str(people.std().astype(int)))

rows: 6
columns:5
Number of countires:5
Average people: 60463010


In [234]:
# Print last 4 rows
print(countries[-4:])

[[b'Japan' b'126045000' b'377835' b'4938' b'YEN']
 [b'Canada' b'36503097' b'9984670' b'1529' b'CAD']
 [b'Italy' b'60501718' b'301338' b'1850' b'EUR']
 [b'Brazilia' b'208360000' b'8515770' b'1798' b'REAL']]


In [235]:
# Show all the row of countries who have the EURO
print(countries[countries[:, 4].astype('str') == 'EUR', :])

[[b'Germany' b'82521653' b'357385' b'3466' b'EUR']
 [b'Italy' b'60501718' b'301338' b'1850' b'EUR']]


In [236]:
# Show only name and Currency in a new data frame
names_currencies = countries[:, [0, 4]]
print(names_currencies)

[[b'Name' b'Currency']
 [b'Germany' b'EUR']
 [b'Japan' b'YEN']
 [b'Canada' b'CAD']
 [b'Italy' b'EUR']
 [b'Brazilia' b'REAL']]


In [237]:
# Show only the rows/countries that have more than 2000 BIP (it is in Milliarden USD Bruttoinlandsprodukt)
print(countries_num[countries_num[:, 3].astype('int') > 2000, :])

[[b'Germany' b'82521653' b'357385' b'3466' b'EUR']
 [b'Japan' b'126045000' b'377835' b'4938' b'YEN']]


In [238]:
# Select all countries where with inhabitants between 50 and 150 Mio
condition = (countries_num[:, 1].astype('int') > 50000000) & (countries_num[:, 1].astype('int') < 150000000)
print(countries_num[condition])

[[b'Germany' b'82521653' b'357385' b'3466' b'EUR']
 [b'Japan' b'126045000' b'377835' b'4938' b'YEN']
 [b'Italy' b'60501718' b'301338' b'1850' b'EUR']]


In [239]:
# Change BIP to Bip
countries[np.argwhere(countries == b'BIP')[0,0], np.argwhere(countries == b'BIP')[0,1]] = b'Bip'
print(countries)

[[b'Name' b'People' b'Area' b'Bip' b'Currency']
 [b'Germany' b'82521653' b'357385' b'3466' b'EUR']
 [b'Japan' b'126045000' b'377835' b'4938' b'YEN']
 [b'Canada' b'36503097' b'9984670' b'1529' b'CAD']
 [b'Italy' b'60501718' b'301338' b'1850' b'EUR']
 [b'Brazilia' b'208360000' b'8515770' b'1798' b'REAL']]


In [240]:
# Calculate the Bip sum
print(countries_num[:,[3]].astype(int).sum())

13581


In [241]:
# Calculate the average people of all countries
print(countries_num[:,[1]].astype(int).std().astype(int))

60463010


In [242]:
# Sort by name alphabetically
print(countries_num[countries_num[:,0].argsort()])

[[b'Brazilia' b'208360000' b'8515770' b'1798' b'REAL']
 [b'Canada' b'36503097' b'9984670' b'1529' b'CAD']
 [b'Germany' b'82521653' b'357385' b'3466' b'EUR']
 [b'Italy' b'60501718' b'301338' b'1850' b'EUR']
 [b'Japan' b'126045000' b'377835' b'4938' b'YEN']]


In [248]:
# Create a new data frame from the original where the area is changed as follows: 
# all countries with > 1000000 get BIG and <= 1000000 get SMALL in the cell replaced!
#countries_num[countries_num.astype(int) > 1000000 == 1, 1] = b'BIG'
#countries_num
np.place(countries_num[:,2], countries_num[:,2].astype(int) > 1000000, 'BIG')
np.place(countries_num[:,2], countries_num[:,2] != 'BIG', 'SMALL')
print(countries_num)

[[b'Germany' b'82521653' 'SMALL' b'3466' b'EUR']
 [b'Japan' b'126045000' 'SMALL' b'4938' b'YEN']
 [b'Canada' b'36503097' 'BIG' b'1529' b'CAD']
 [b'Italy' b'60501718' 'SMALL' b'1850' b'EUR']
 [b'Brazilia' b'208360000' 'BIG' b'1798' b'REAL']]
