In [2]:
#importing pandas
import pandas as pd
#controlling the output format

pd.set_option('display.max_columns',10)
pd.set_option('display.max_rows',10)
pd.set_option('display.width',60)
pd.set_option('display.notebook_repr_html',False)

In [5]:
#creating Series
s = pd.Series([1,2,3,4,5])
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [6]:
#Accessing elements in the series
s[1]

2

In [7]:
#accessing multiple values in the series
s[[2,4]]

2    3
4    5
dtype: int64

In [8]:
#A series object can be created using a user defined index by using the Index parameter
a = pd.Series([3,2,1,4,5],index = ['a','b','c','d','e'])
a

a    3
b    2
c    1
d    4
e    5
dtype: int64

In [9]:
#now we can access using the alphanumeric index
a[['b','e']]

b    2
e    5
dtype: int64

In [10]:
#passing a list of integers to a Series of a non integer index will look up based upon 0-based index like an array
a[[1,2]]

  a[[1,2]]


b    2
c    1
dtype: int64

In [12]:
#we can examine the index of a series using the index property
a.index


Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [14]:
#create an index who's index is a series of dates between the specified dates
dates = pd.date_range('2023-01-01','2023-01-06')
dates

DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03',
               '2023-01-04', '2023-01-05', '2023-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
#the above has created a special index that is DatetimeIndex
#Create a series which will represent temperatures for each date in the series
temps = pd.Series([80,82,85,90,83,87],index=dates)

In [16]:
temps

2023-01-01    80
2023-01-02    82
2023-01-03    85
2023-01-04    90
2023-01-05    83
2023-01-06    87
Freq: D, dtype: int64

In [17]:
# Looking up temperature of a specific date by:
temps['2023-01-05']

83

In [18]:
temps1 = pd.Series([70,75,69,83,79,77],index = dates)
#calculate the difference on those matching lables
temp_diff = temps-temps1
temp_diff

2023-01-01    10
2023-01-02     7
2023-01-03    16
2023-01-04     7
2023-01-05     4
2023-01-06    10
Freq: D, dtype: int64

In [19]:
#since the index is a non-integer we can also use the 0-based index to access values like so:
temp_diff[0]

  temp_diff[0]


10

In [20]:
#finding the mean
temp_diff.mean()

9.0

In [21]:
#Pandas DataFrame.
#in a Series only one value can be associated with a certain index if we wanted multiple values to be associated with an index we can use a DataFrame
#Each series will be a columns in a dataframe


In [22]:
#creating a Dataframe from the two series and giving them column names
temps_df = pd.DataFrame({
    "Nairobi" : temps,
    "Kiambu" : temps1
})
temps_df

            Nairobi  Kiambu
2023-01-01       80      70
2023-01-02       82      75
2023-01-03       85      69
2023-01-04       90      83
2023-01-05       83      79
2023-01-06       87      77

In [25]:
#columns in a Dataframe can be accessed using an indexer with the name of column or a list of column names
temps_df["Nairobi"]

2023-01-01    80
2023-01-02    82
2023-01-03    85
2023-01-04    90
2023-01-05    83
2023-01-06    87
Freq: D, Name: Nairobi, dtype: int64

In [26]:
temps_df["Kiambu"]

2023-01-01    70
2023-01-02    75
2023-01-03    69
2023-01-04    83
2023-01-05    79
2023-01-06    77
Freq: D, Name: Kiambu, dtype: int64

In [27]:
temps_df[["Nairobi","Kiambu"]]

            Nairobi  Kiambu
2023-01-01       80      70
2023-01-02       82      75
2023-01-03       85      69
2023-01-04       90      83
2023-01-05       83      79
2023-01-06       87      77

In [28]:
temps_df.Nairobi

2023-01-01    80
2023-01-02    82
2023-01-03    85
2023-01-04    90
2023-01-05    83
2023-01-06    87
Freq: D, Name: Nairobi, dtype: int64

In [29]:
temps_df.Kiambu

2023-01-01    70
2023-01-02    75
2023-01-03    69
2023-01-04    83
2023-01-05    79
2023-01-06    77
Freq: D, Name: Kiambu, dtype: int64

In [30]:
temps_df.Nairobi - temps_df.Kiambu

2023-01-01    10
2023-01-02     7
2023-01-03    16
2023-01-04     7
2023-01-05     4
2023-01-06    10
Freq: D, dtype: int64

In [31]:
temps_df["Differences"] = temps_df.Nairobi - temps_df.Kiambu

In [32]:
temps_df

            Nairobi  Kiambu  Differences
2023-01-01       80      70           10
2023-01-02       82      75            7
2023-01-03       85      69           16
2023-01-04       90      83            7
2023-01-05       83      79            4
2023-01-06       87      77           10

In [33]:
#Names of columns in the Dataframe are accessible via the columns property
temps_df.columns

Index(['Nairobi', 'Kiambu', 'Differences'], dtype='object')

In [34]:
# Accessing values in a columns can be done using the following way
temps_df.Differences[:]

2023-01-01    10
2023-01-02     7
2023-01-03    16
2023-01-04     7
2023-01-05     4
2023-01-06    10
Freq: D, Name: Differences, dtype: int64

In [35]:
temps_df.Differences[1:3]

2023-01-02     7
2023-01-03    16
Freq: D, Name: Differences, dtype: int64

In [36]:
#To retrieve Entire columns from the Dataframe one can use the .loc and .iloc property
# .loc ensures that the look up is by index.
# .iloc ensures that the look up is by the 0-based position
temps_df.iloc[1]

Nairobi        82
Kiambu         75
Differences     7
Name: 2023-01-02 00:00:00, dtype: int64

In [39]:
#retrieving using the index label 
temps_df.loc['2023-01-03']

Nairobi        85
Kiambu         69
Differences    16
Name: 2023-01-03 00:00:00, dtype: int64

In [42]:
temps_df.iloc[[1,3,4]].Differences

2023-01-02    7
2023-01-04    7
2023-01-05    4
Name: Differences, dtype: int64

In [None]:
# Rows of 