# Introduction into Pandas

### Data Structures

In [3]:
import pandas as pd
S =  pd.Series([10, 20, 30, 40 ,50, 60, 70])
S
print(S)
print(S.values)

0    10
1    20
2    30
3    40
4    50
5    60
6    70
dtype: int64
[10 20 30 40 50 60 70]


In [4]:
fruits= ['Apples', 'Mangoes', 'Oranges', 'Pineapples', 'Ovacado']

S = pd.Series([45, 23, 12, 32, 18], index=fruits)
S2 = pd.Series([14, 25, 36, 62, 22], index=fruits)
print( S + S2)
print("sum of S: ", sum(S))


Apples        59
Mangoes       48
Oranges       48
Pineapples    94
Ovacado       40
dtype: int64
sum of S:  130


In [5]:
print(S2['Apples'])

14


### PANDAS.SERIES.APPLY

### Creating Series objects from dictionaries

In [6]:
cities = {'London': 22,
         'Berlin': 33,
         'Rome': 52,
         'Nairobi': 34,
         'Nakuru': 56,
         'Mombasa': 44}
cities_series = pd.Series(cities)
print(cities_series)

London     22
Berlin     33
Rome       52
Nairobi    34
Nakuru     56
Mombasa    44
dtype: int64


### NAN - MISSING DATA

In [7]:
my_cities = ['Eldoret', 'Paris', 'London', 'Kisumu', 'Mombasa', 'Nairobi', 'London', 'Berlin']

my_cities_series = pd.Series(cities, index= my_cities)
my_cities_series

Eldoret     NaN
Paris       NaN
London     22.0
Kisumu      NaN
Mombasa    44.0
Nairobi    34.0
London     22.0
Berlin     33.0
dtype: float64

### THE METHODS ISNULL() AND NOTNULL()

In [8]:
print(my_cities_series.isnull())

Eldoret     True
Paris       True
London     False
Kisumu      True
Mombasa    False
Nairobi    False
London     False
Berlin     False
dtype: bool


In [30]:
print(my_cities_series.notnull())

Eldoret    False
Paris      False
London      True
Kisumu     False
Mombasa     True
Nairobi     True
London      True
Berlin      True
dtype: bool


### FILTERING OUT MISSING DATA 

In [31]:
print(my_cities_series.dropna())

London     22.0
Mombasa    44.0
Nairobi    34.0
London     22.0
Berlin     33.0
dtype: float64


#### FILLING IN MISSING DATA

In [34]:
missing_cities= {'Eldoret': 43, 'Paris': 90, 'Kisumu': 88}
#.astype converts the data type to integer
my_cities_series.fillna(missing_cities).astype(int)

Eldoret    43
Paris      90
London     22
Kisumu     88
Mombasa    44
Nairobi    34
London     22
Berlin     33
dtype: int32

# DataFrame

In [11]:
 import pandas as pd
years = range(2014, 2018)
shop1 = pd.Series([2409.14, 2941.01, 3496.83, 3119.55], index=years)
shop2 = pd.Series([1203.45, 3441.62, 3007.83, 3619.53], index=years)
shop3 = pd.Series([3412.12, 3491.16, 3457.19, 1963.10], index=years)
pd.concat([shop1, shop2, shop3])

2014    2409.14
2015    2941.01
2016    3496.83
2017    3119.55
2014    1203.45
2015    3441.62
2016    3007.83
2017    3619.53
2014    3412.12
2015    3491.16
2016    3457.19
2017    1963.10
dtype: float64

In [12]:
shops_df = pd.concat([shop1, shop2, shop3], axis=1)
shops_df

Unnamed: 0,0,1,2
2014,2409.14,1203.45,3412.12
2015,2941.01,3441.62,3491.16
2016,3496.83,3007.83,3457.19
2017,3119.55,3619.53,1963.1


In [13]:
cities =["Nairobi", "Kisumu", "Mombasa"]
shops_df.columns = cities
print(shops_df)

      Nairobi   Kisumu  Mombasa
2014  2409.14  1203.45  3412.12
2015  2941.01  3441.62  3491.16
2016  3496.83  3007.83  3457.19
2017  3119.55  3619.53  1963.10


In [50]:
# Rename columns
shops_df.rename(columns={'Mombasa':'Nyeri'}, inplace= True)
print(shops_df)

      Nairobi   Kisumu    Nyeri
2014  2409.14  1203.45  3412.12
2015  2941.01  3441.62  3491.16
2016  3496.83  3007.83  3457.19
2017  3119.55  3619.53  1963.10


#### ACCESSING ROWS VIA INDEX VALUES 

In [15]:
print(shops_df.loc[2017])
print(shops_df.loc[[2014,2015]])

Nairobi    3119.55
Kisumu     3619.53
Mombasa    1963.10
Name: 2017, dtype: float64
      Nairobi   Kisumu  Mombasa
2014  2409.14  1203.45  3412.12
2015  2941.01  3441.62  3491.16


#### ACCESSING ROWS BY POSITION

In [17]:

df = shops_df.iloc[0]
df

Nairobi    2409.14
Kisumu     1203.45
Mombasa    3412.12
Name: 2014, dtype: float64

### continuation data A

In [14]:
import pandas as pd
cities = {
"name": ["London", "Berlin", "Madrid", "Romania",
                "Paris", "Vienna", "Bucharest", "Hamburg",
                "Budapest", "Warsaw", "Barcelona",
                "Munich", "Milan"],
"population": [8615246, 3562166, 3165235, 2874038,
                2273305, 1805681, 1803425, 1760433,
                1754000, 1740119, 1602386, 1493900,
                1350680],
"country": ["England", "Germany", "Spain", "Italy",
            "France", "Austria", "Romania",
            "Germany", "Hungary", "Poland", "Spain",
            "Germany", "Italy"]}
city_frame = pd.DataFrame(cities)
city_frame

Unnamed: 0,name,population,country
0,London,8615246,England
1,Berlin,3562166,Germany
2,Madrid,3165235,Spain
3,Romania,2874038,Italy
4,Paris,2273305,France
5,Vienna,1805681,Austria
6,Bucharest,1803425,Romania
7,Hamburg,1760433,Germany
8,Budapest,1754000,Hungary
9,Warsaw,1740119,Poland


In [15]:
# Rename columns
import pandas as pd
# Convert dictionary to DataFrame
cities = pd.DataFrame(cities)
cities.rename(columns={'name': 'City', 'population': 'Population', 'country': 'Country'}, inplace= True)
cities

Unnamed: 0,City,Population,Country
0,London,8615246,England
1,Berlin,3562166,Germany
2,Madrid,3165235,Spain
3,Romania,2874038,Italy
4,Paris,2273305,France
5,Vienna,1805681,Austria
6,Bucharest,1803425,Romania
7,Hamburg,1760433,Germany
8,Budapest,1754000,Hungary
9,Warsaw,1740119,Poland


In [16]:
cities = pd.DataFrame(cities, columns= ['Country', 'City', 'Population'])
cities


Unnamed: 0,Country,City,Population
0,England,London,8615246
1,Germany,Berlin,3562166
2,Spain,Madrid,3165235
3,Italy,Romania,2874038
4,France,Paris,2273305
5,Austria,Vienna,1805681
6,Romania,Bucharest,1803425
7,Germany,Hamburg,1760433
8,Hungary,Budapest,1754000
9,Poland,Warsaw,1740119


In [24]:
#creating a new column
cities ['Area']= [2712, 3344, 7847, 9848, 9667,4445, 9170, 9788, 1234, 9543, 7086, 6078, 8799]
cities

Unnamed: 0,Country,City,Population,Area
0,England,London,8615246,2712
1,Germany,Berlin,3562166,3344
2,Spain,Madrid,3165235,7847
3,Italy,Romania,2874038,9848
4,France,Paris,2273305,9667
5,Austria,Vienna,1805681,4445
6,Romania,Bucharest,1803425,9170
7,Germany,Hamburg,1760433,9788
8,Hungary,Budapest,1754000,1234
9,Poland,Warsaw,1740119,9543
