### Load the CSV from URL

In [58]:
import numpy as np
import pandas as pd
url = 'https://raw.githubusercontent.com/edlich/eternalrepo/master/DS-WAHLFACH/countries.csv'
data = pd.read_csv(url, error_bad_lines=False)
print(data)

       Name     People     Area   BIP Currency
0   Germany   82521653   357385  3466      EUR
1     Japan  126045000   377835  4938      YEN
2    Canada   36503097  9984670  1529      CAD
3     Italy   60501718   301338  1850      EUR
4  Brazilia  208360000  8515770  1798     REAL


### Basic information

In [59]:
df = pd.DataFrame(data)
total_rows=len(df.axes[0])
total_cols=len(df.axes[1])
print("Number of Rows: "+str(total_rows))
print("Number of Columns: "+str(total_cols))

uniqueName = df["Name"].is_unique
uniquePeople = df["People"].is_unique
uniqueArea = df["Area"].is_unique
uniqueBIP = df["BIP"].is_unique
uniqueCurrency = df["Currency"].is_unique

print(uniqueName)
print(uniquePeople)
print(uniqueArea)
print(uniqueBIP)
print(uniqueCurrency)

Number of Rows: 5
Number of Columns: 5
True
True
True
True
False


### Show the last 4 rows of the data frame

In [60]:
data.iloc[-4:]

Unnamed: 0,Name,People,Area,BIP,Currency
1,Japan,126045000,377835,4938,YEN
2,Canada,36503097,9984670,1529,CAD
3,Italy,60501718,301338,1850,EUR
4,Brazilia,208360000,8515770,1798,REAL


### Show all the row of countries who have the EURO

In [61]:
print(data.loc[df['Currency'] == 'EUR'])

      Name    People    Area   BIP Currency
0  Germany  82521653  357385  3466      EUR
3    Italy  60501718  301338  1850      EUR


### Show only name and Currency in a new data frame

In [62]:
df1 = data[['Name','Currency']]
df1

Unnamed: 0,Name,Currency
0,Germany,EUR
1,Japan,YEN
2,Canada,CAD
3,Italy,EUR
4,Brazilia,REAL


### Show only the rows/countries that have more than 2000 BIP (it is in Milliarden USD Bruttoinlandsprodukt)

In [63]:
data.loc[(data['BIP'] >= 2000)]

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN


### Select all countries where with inhabitants between 50 and 150 Mio

In [64]:
data.loc[(data['People'] >= 50000000) & (df['People'] <= 150000000), 'Name']

0    Germany
1      Japan
3      Italy
Name: Name, dtype: object

### Change BIP to Bip

In [65]:
df=data.rename(columns = {'BIP':'Bip'})
df

Unnamed: 0,Name,People,Area,Bip,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN
2,Canada,36503097,9984670,1529,CAD
3,Italy,60501718,301338,1850,EUR
4,Brazilia,208360000,8515770,1798,REAL


### Calculate the Bip sum

In [66]:
Total = df['Bip'].sum()
print (Total)

13581


### Calculate the average people of all countries

In [67]:
Avg = data["People"].mean()
print(Avg)

102786293.6


### Sort by name alphabetically

In [68]:
data.sort_values('Name')

Unnamed: 0,Name,People,Area,BIP,Currency
4,Brazilia,208360000,8515770,1798,REAL
2,Canada,36503097,9984670,1529,CAD
0,Germany,82521653,357385,3466,EUR
3,Italy,60501718,301338,1850,EUR
1,Japan,126045000,377835,4938,YEN


### Create a new data frame from the original where the area is changed as follows: all countries with > 1000000 get BIG and <= 1000000 get SMALL in the cell replaced!

In [73]:
url = 'https://raw.githubusercontent.com/edlich/eternalrepo/master/DS-WAHLFACH/countries.csv'
data = pd.read_csv(url, error_bad_lines=False)

newDF = data

newDF['Area'] = np.where(newDF.Area > 1000000, 'BIG', 'SMALL')
newDF

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,SMALL,3466,EUR
1,Japan,126045000,SMALL,4938,YEN
2,Canada,36503097,BIG,1529,CAD
3,Italy,60501718,SMALL,1850,EUR
4,Brazilia,208360000,BIG,1798,REAL
