### Groups of Functions in Pandas for Data Analysis

#### A. Creating Series and DataFrames


In [None]:
# Let's create a pandas Series using a python list 

# Step 1: Import pandas package
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Set up views
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)


# Step 2: Define a list
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Step 3: Create the series
series = pd.Series(data)

# lrt's view the series we created
series.head(10)


0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [13]:
# let's confirm to be sure we created a pandas series
type(series)

pandas.core.series.Series

In [None]:
# Let's create a series using same list, but now we will be adding our own series numbering, in python or pandas
series2 = pd.Series(data, index = ["a","b","c","d","e","f","g","h","i","j"])
series2.head(10)

a     1
b     2
c     3
d     4
e     5
f     6
g     7
h     8
i     9
j    10
dtype: int64

In [42]:
# Let's create a series using python dictionary

#lets create a python dictionary
data2 = {'a': 10, 'b' : 20, 'c': 30}

#lets create the series
series3 = pd.Series(data2)
series3.head()


a    10
b    20
c    30
dtype: int64

### Hands on practice:

In [25]:
#Question 1

#  Create a bucket list
bucket_list = ['bag', 'jewellery', 'ring', 'car', 'shoes', 'phone']

#Convert the list to series with alphabets indexing
bucket_series = pd.Series(bucket_list, index=["a", "b", "c", "d", "e", "f"])

#print
bucket_series.head(6)




a          bag
b    jewellery
c         ring
d          car
e        shoes
f        phone
dtype: object

In [27]:

## Question 2
# Biodata information  in dictionary
bio_data = {
    'Name' : "Olawale",
    'Age' : 16,
    'Level' : "500L",
    'Track' : 'AI Engineer',
    'State of Origin' : 'Osun'
    }

# Convert to pandas series
bio_data_series = pd.Series(bio_data)

bio_data_series.head()

Name                   Olawale
Age                         16
Level                     500L
Track              AI Engineer
State of Origin           Osun
dtype: object

## Creating a DataFrame

In [67]:
# Lets create a dataFrame

# Step 1: import pandas
import pandas as pd

# Define the data using dictionary that is having its value as list

data = {
    'Name': ['Chris', 'Ayo', 'Chisom'],
    'Age': [26, 24, 22],
    'Home_Town': ['Benin', 'Ibadan', 'Enugu']
}

# Let's create a DtaFrame using "df" as short for DataFrame
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


In [30]:
# lets do the same thing by using list of dictionaries

data2 = [
    {'Name': 'Chris', 'Age': 26, 'Home_Town': 'Benin'},
    {'Name': 'Ayo', 'Age' : 24, 'Home_Town': 'Ibadan'},
    {'Name': 'Chisom', 'Age': 22, 'Home_Town': 'Enugu'}
]

# Let's define the dataframe
df2 = pd.DataFrame(data2)
df2.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


In [None]:
# Lets do same thing again using list of list

data3 = [
    ['Chris', 26, 'Benin'],
    ['Ayo', 24, 'Ibadan'],
    ['Chisom', 22, 'Enugu']
]
df3 = pd.DataFrame(data3, columns=['Name', 'Age', 'Home_Town'])

df3.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,


In [None]:
# lets print the types to be sure we have defined dataframes
print(type(df))
print(type(df2))
print(type(df3))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


## B. Data Input and Output

In [7]:
# # # Lets get to work...
import pandas as pd
biodata = pd.read_csv('Untitled_form.csv')
biodata.head()


# # Ensure to code along...

Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,Mac OS,non
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa,AI,Lagos,Female,22,HP,Windows,Thank you so much for the opportunity.
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking


## C.  Data Inspection and Exploration

In [None]:
'''
.head() # To view the first 5 rows
```

```c
.tail() # To view the last 5 rows
```

```c
.info() # To check the information about the data
```

```c
.describe() # statistical summary
```

```c
.shape # Check the dimension of the dataset
```

```c
.columns # for checking the column names
'''

## D. Data Cleaning

In [73]:
# Do we have any missing values? if yes,lets fill them up


df.isna()
df.dropna()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


## E. Data Selection and Filtering

In [None]:
# bio_data_column = [irst_Name, Last_Name, City, Course_Track, PC_make, PC_Os, Feedback]

In [9]:
biodata.columns

Index(['Timestamp', 'First Name', 'Last Name', 'Course Track', 'City',
       'Gender', 'Seat Number', 'PC-Make', 'PC - OS', 'Feedback'],
      dtype='object')

##### Column Selection

In [None]:
# lets look througha single column
biodata['First Name']

# # alternatively, we can use dot
# biodata.Gender

0       Male
1     Female
2     Female
3       Male
4       Male
5       Male
6     Female
7     Female
8     Female
9       Male
10    Female
11    Female
12      Male
13      Male
14      Male
15      Male
16      Male
17      Male
18      Male
19      Male
20      Male
21      Male
22      Male
23      Male
24      Male
25      Male
26      Male
27    Female
28      Male
29      Male
30      Male
31      Male
32      Male
33      Male
Name: Gender, dtype: object

In [19]:
# LEts select multiple columns
biodata[['Gender', 'City']]

Unnamed: 0,Gender,City
0,Male,Ogun
1,Female,Abeokuta
2,Female,Lagos
3,Male,Abeokuta
4,Male,Abeokuta
5,Male,Abeokuta
6,Female,Abeokuta
7,Female,Lagos
8,Female,Nairobi
9,Male,Abeokuta


#### Cell Selection

In [26]:
# lets select a single cell

biodata['City'][1] # This will return the first value of the "City" column

# lets try other methods for selecting cells
biodata.at[0, "City"] # This will also return the first value of the "City" column


# There is still another method using .iat[]
biodata.iat[1, 3] # This will return the first value of the first column(row0,column0)

'AI'

#### Row Selection

In [28]:
# Lets select some rows
biodata.iloc[0:5] # we are selecting from index 0 to the 5th index

Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,Mac OS,non
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa,AI,Lagos,Female,22,HP,Windows,Thank you so much for the opportunity.
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking


In [29]:
# combination of row and column selection
biodata.iloc[0:5, 0:3] # the first slice picks the rows and the second slice picks the columns

Unnamed: 0,Timestamp,First Name,Last Name
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi


### Hands on Practice

In [39]:
biodata.loc[0:1, 'City']
# biodata.loc[0:1]


0        Ogun
1    Abeokuta
Name: City, dtype: object

#### Conditional Filtering

In [None]:
# Filter rows where Gender is 'Female'. This is going to return dataframe
filtered_male = biodata[biodata['Gender'] == 'Male']
print("Rows where Gender is 'Male':")
filtered_male


Rows where Gender is 'Male':


Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,Mac OS,non
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking
5,2025/09/11 12:58:55 PM GMT+1,Abdulmalik,Adedotun,AI,Abeokuta,Male,200,HP,Windows,Enjoying the course so far
9,2025/09/11 12:59:28 PM GMT+1,Hannah,Tanimola,AI,Abeokuta,Male,30,HP,Windows,On God
12,2025/09/11 1:00:03 PM GMT+1,Opeyemi,Odejimi,Cloud Computing,Abeokuta,Male,38,HP,Linux,Na wa
13,2025/09/11 1:00:13 PM GMT+1,Olasunkanmi,Rasak,AI,Kobape,Male,3,HP,Windows,My gratitude to the sponsor of this program an...
14,2025/09/11 1:00:27 PM GMT+1,Saheed,Olayinka,AI;Data Science;Web Dev,Abeokuta,Male,29,HP,Windows,None for now
15,2025/09/11 1:00:31 PM GMT+1,Kehinde,Akindele,Cloud Computing,Abeokuta,Male,54,Gateway,Windows,Great
16,2025/09/11 1:00:43 PM GMT+1,Oluwole,Oludayo,AI,Abeokuta,Male,09,HP,Windows,Good training to attend


In [41]:
# Filter rows where City is "abeokuta" and course_track is Ai
filtered_city = biodata[(biodata['City'] == 'Abeokuta') & (biodata['Course Track'] == 'AI')]
print("Rows where City is 'Lagos' and Course_Track is 'Data Science':")
filtered_city


Rows where City is 'Lagos' and Course_Track is 'Data Science':


Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking
5,2025/09/11 12:58:55 PM GMT+1,Abdulmalik,Adedotun,AI,Abeokuta,Male,200,HP,Windows,Enjoying the course so far
6,2025/09/11 12:58:55 PM GMT+1,Naheemot,Adebiyi,AI,Abeokuta,Female,32,DELL,Windows,Grateful for the opportunity to be here.
9,2025/09/11 12:59:28 PM GMT+1,Hannah,Tanimola,AI,Abeokuta,Male,30,HP,Windows,On God
11,2025/09/11 12:59:43 PM GMT+1,Esther,Kudoro,AI,Abeokuta,Female,1,HP,Windows,Chill
16,2025/09/11 1:00:43 PM GMT+1,Oluwole,Oludayo,AI,Abeokuta,Male,9,HP,Windows,Good training to attend
18,2025/09/11 1:00:54 PM GMT+1,Ademola,Akinrinde,AI,Abeokuta,Male,100,MACBOOK,Mac OS,Awesome shit\n\n
21,2025/09/11 1:01:46 PM GMT+1,Ayuba,Raji,AI,Abeokuta,Male,26,HP,Windows,None for now


In [42]:
# Filter rows where City is either  'Lagos' or "Abeokuta"
cities = ['Abeokuta', 'Lagos']
city_filthered = biodata[biodata['City'].isin (cities)]
print("Rows where City is either 'Lagos' or 'Abeokuta':")
city_filthered

Rows where City is either 'Lagos' or 'Abeokuta':


Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa,AI,Lagos,Female,22,HP,Windows,Thank you so much for the opportunity.
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking
5,2025/09/11 12:58:55 PM GMT+1,Abdulmalik,Adedotun,AI,Abeokuta,Male,200,HP,Windows,Enjoying the course so far
6,2025/09/11 12:58:55 PM GMT+1,Naheemot,Adebiyi,AI,Abeokuta,Female,32,DELL,Windows,Grateful for the opportunity to be here.
7,2025/09/11 12:59:00 PM GMT+1,Kanyisola,Fagbayi,AI;Data Science,Lagos,Female,82,HP,Windows,One chin chin for you for this form
9,2025/09/11 12:59:28 PM GMT+1,Hannah,Tanimola,AI,Abeokuta,Male,30,HP,Windows,On God
10,2025/09/11 12:59:41 PM GMT+1,Deborah,Adelegan,AI;Data Science,Abeokuta,Female,1,HP,Windows,None for now
11,2025/09/11 12:59:43 PM GMT+1,Esther,Kudoro,AI,Abeokuta,Female,1,HP,Windows,Chill
