# Creating Series and DataFrame

### Creating a Series

In [2]:

import pandas as pd

import warnings
warnings.filterwarnings('ignore')
#this command allows  you to view the entire content of the csv without using head()
pd.set_option("display.max_columns", None)
pd.set_option('display.width', 1000)


In [3]:
#creating a pandas Series using a python list

data = [1,2,3,4,5,6,7,8,9,10]
series = pd.Series(data)
#.head(n) prints out the first n rows
series.head(10)
#type() checks if the inputted data is a Series or a DataFrame
# type(series)



0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [4]:
#Creating a series using our serial number
series2 = pd.Series(data, index=["a", "b", "c", "d","e","f","g","h","i","j"])
series2.head(10)

a     1
b     2
c     3
d     4
e     5
f     6
g     7
h     8
i     9
j    10
dtype: int64

In [5]:
#Creating a series using python dictionary
data2 = {"a": 10, "b": 20, "c": 30}

series3 = pd.Series(data2)
series3.head()

a    10
b    20
c    30
dtype: int64

#### Hands on practice

In [6]:
# Question 1
#A list of 6 items with alphabet as the index
bucket = ["fan", "water", "bottle", "purse", "earbuds", "phone"]
series_bucket = pd.Series(bucket, index=['a', 'b', 'c', 'd', 'e', 'f'])
series_bucket.head()

a        fan
b      water
c     bottle
d      purse
e    earbuds
dtype: object

In [7]:
#Question 2
biodata = {"Name": "Esther", "Age":23, "Track": "AI_Eng", "State": "Ogun", "Height": 155}
series_biodata = pd.Series(biodata)
series_biodata.head()

Name      Esther
Age           23
Track     AI_Eng
State       Ogun
Height       155
dtype: object

### Creating a  DataFrame

In [8]:
#Creating a DataFrame
data = {
    "Name": ["Chris", "Ayo", "Chisom"],
    "Age": [26,24, 22],
    "Home_Town": ["Benin", "Ibadan", "Enugu"]
}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


In [9]:
#Using a list of dictionaries
data2 = [
    {"Name": "Chris", "Age": 26, "Home_Town":"Benin"},
    {"Name": "Ayo", "Age": 24, "Home_Town": "Ibadan"}, 
    {"Name": "Chisom", "Age": 22, "Home_Town": "Enugu"}
]
df2 = pd.DataFrame(data2)
df2.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


In [10]:
#Using a list of lists
data3 = [
    ["Chris", 26, "Benin"],
    ["Ayo", 24, "Ibadan"],
    ["Chisom", 22, "Enugu"]
]
df3 = pd.DataFrame(data3, columns=["Name", "Age", "Home_Town"])
df3.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


In [11]:
print(type(df))
print(type(df2))
print(type(df3))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


#### Hands on practice

In [12]:
#Reading the csv file
df = pd.read_csv("biodata.csv")
df.head()
df.tail() #gives the last five rows of the dataset
df.info() #gives basic info about the data
df.describe() #give the statistical summary of the data
df.shape
df.columns #checks the columns names



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34 entries, 0 to 33
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Timestamp     34 non-null     object
 1   First Name    34 non-null     object
 2   Last Name     34 non-null     object
 3   Course Track  34 non-null     object
 4   City          34 non-null     object
 5   Gender        34 non-null     object
 6   Seat Number   34 non-null     int64 
 7   PC-Make       34 non-null     object
 8   PC - OS       34 non-null     object
 9   Feedback      34 non-null     object
dtypes: int64(1), object(9)
memory usage: 2.8+ KB


Index(['Timestamp', 'First Name', 'Last Name', 'Course Track', 'City', 'Gender', 'Seat Number', 'PC-Make', 'PC - OS', 'Feedback'], dtype='object')

#### Data Cleaning

In [13]:
#DATA CLEANING
#Handling missing values
df.isna().sum()
df.isnull().sum()
# df.fillna(0, inplace= False) #nothing happens since there are no null cells

Timestamp       0
First Name      0
Last Name       0
Course Track    0
City            0
Gender          0
Seat Number     0
PC-Make         0
PC - OS         0
Feedback        0
dtype: int64

In [14]:
#Finding and Handling duplicates
df.duplicated() #checks for duplicated instances
# df.drop_duplicates() #drops duplicates

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
31    False
32    False
33    False
dtype: bool

In [15]:
#Correcting Data Types
#this checks for the data type in the dataset
df.dtypes

#this changes the data type of the specified column to another data type
df["Seat Number"].astype(int)

#converts the datatype of the specified column to date/time format
pd.to_datetime(df["Timestamp"])

0    2025-09-11 12:55:34-01:00
1    2025-09-11 12:56:11-01:00
2    2025-09-11 12:57:08-01:00
3    2025-09-11 12:57:56-01:00
4    2025-09-11 12:58:41-01:00
5    2025-09-11 12:58:55-01:00
6    2025-09-11 12:58:55-01:00
7    2025-09-11 12:59:00-01:00
8    2025-09-11 12:59:16-01:00
9    2025-09-11 12:59:28-01:00
10   2025-09-11 12:59:41-01:00
11   2025-09-11 12:59:43-01:00
12   2025-09-11 13:00:03-01:00
13   2025-09-11 13:00:13-01:00
14   2025-09-11 13:00:27-01:00
15   2025-09-11 13:00:31-01:00
16   2025-09-11 13:00:43-01:00
17   2025-09-11 13:00:49-01:00
18   2025-09-11 13:00:54-01:00
19   2025-09-11 13:01:31-01:00
20   2025-09-11 13:01:44-01:00
21   2025-09-11 13:01:46-01:00
22   2025-09-11 13:01:54-01:00
23   2025-09-11 13:02:28-01:00
24   2025-09-11 13:02:49-01:00
25   2025-09-11 13:02:56-01:00
26   2025-09-11 13:02:59-01:00
27   2025-09-11 13:03:12-01:00
28   2025-09-11 13:03:36-01:00
29   2025-09-11 13:06:03-01:00
30   2025-09-11 13:06:48-01:00
31   2025-09-11 13:10:16-01:00
32   202

#### Data Selection and Filtering

In [16]:
#Column Viewing
#This prints out the column names
df.columns

Index(['Timestamp', 'First Name', 'Last Name', 'Course Track', 'City', 'Gender', 'Seat Number', 'PC-Make', 'PC - OS', 'Feedback'], dtype='object')

In [17]:
#Column Selection
#Looking through a single column

df["First Name"]

#Alternatively, dot can be used
df.Gender

0       Male
1     Female
2     Female
3       Male
4       Male
5       Male
6     Female
7     Female
8     Female
9       Male
10    Female
11    Female
12      Male
13      Male
14      Male
15      Male
16      Male
17      Male
18      Male
19      Male
20      Male
21      Male
22      Male
23      Male
24      Male
25      Male
26      Male
27    Female
28      Male
29      Male
30      Male
31      Male
32      Male
33      Male
Name: Gender, dtype: object

In [18]:
#Selecting multiple column
df[["First Name", "Last Name", "Feedback"]]

Unnamed: 0,First Name,Last Name,Feedback
0,Peter,Okonmah,non
1,Toyeebat,Nababa,Excellent
2,Perpetual,Meninwa,Thank you so much for the opportunity.
3,Mahfuz,Abdulhameed,Amazing Shit
4,Divine,Gbadamosi,Brain Racking
5,Abdulmalik,Adedotun,Enjoying the course so far
6,Naheemot,Adebiyi,Grateful for the opportunity to be here.
7,Kanyisola,Fagbayi,One chin chin for you for this form
8,Blessing,James,Thanks for creating the form.
9,Hannah,Tanimola,On God


In [19]:
#Selecting a single cell
#this prints out the first name with index number 2
df["First Name"][2]

#other methods
#print out the name with index number 0
df.at[0, "First Name"]

#prints out the first element in the first row and second column
df.iat[0,1]

'Peter'

### Row selection

In [20]:
df.iloc[0:5] #selects rows from index 0 to 5

Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,Mac OS,non
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa,AI,Lagos,Female,22,HP,Windows,Thank you so much for the opportunity.
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking


In [21]:
#combination of row and column selection
df.iloc[0:5, 0:3] #prints out the first 5 rows and first three columns

Unnamed: 0,Timestamp,First Name,Last Name
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi


### Conditional Filtering

In [22]:
# Filtering rows where Gender is male
filtered_male = df[df['Gender'] == 'Male']
print("Rows where gender is male: ")
filtered_male

Rows where gender is male: 


Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,Mac OS,non
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking
5,2025/09/11 12:58:55 PM GMT+1,Abdulmalik,Adedotun,AI,Abeokuta,Male,200,HP,Windows,Enjoying the course so far
9,2025/09/11 12:59:28 PM GMT+1,Hannah,Tanimola,AI,Abeokuta,Male,30,HP,Windows,On God
12,2025/09/11 1:00:03 PM GMT+1,Opeyemi,Odejimi,Cloud Computing,Abeokuta,Male,38,HP,Linux,Na wa
13,2025/09/11 1:00:13 PM GMT+1,Olasunkanmi,Rasak,AI,Kobape,Male,3,HP,Windows,My gratitude to the sponsor of this program an...
14,2025/09/11 1:00:27 PM GMT+1,Saheed,Olayinka,AI;Data Science;Web Dev,Abeokuta,Male,29,HP,Windows,None for now
15,2025/09/11 1:00:31 PM GMT+1,Kehinde,Akindele,Cloud Computing,Abeokuta,Male,54,Gateway,Windows,Great
16,2025/09/11 1:00:43 PM GMT+1,Oluwole,Oludayo,AI,Abeokuta,Male,9,HP,Windows,Good training to attend


In [23]:
# Filterings rows where city is Abeokuta and course track is AI
filtered_city = df[(df["City"]== "Abeokuta") & (df["Course Track"]== "AI")]
print(f"Rows where city is Abeokuta and course track is AI: \n{filtered_city}")

Rows where city is Abeokuta and course track is AI: 
                       Timestamp  First Name    Last Name Course Track      City  Gender  Seat Number     PC-Make  PC - OS                                           Feedback
1   2025/09/11 12:56:11 PM GMT+1    Toyeebat       Nababa           AI  Abeokuta  Female           24          HP  Windows                                          Excellent
3   2025/09/11 12:57:56 PM GMT+1      Mahfuz  Abdulhameed           AI  Abeokuta    Male           44          HP  Windows                                       Amazing Shit
4   2025/09/11 12:58:41 PM GMT+1      Divine    Gbadamosi           AI  Abeokuta    Male           35        DELL  Windows                                      Brain Racking
5   2025/09/11 12:58:55 PM GMT+1  Abdulmalik     Adedotun           AI  Abeokuta    Male          200          HP  Windows                         Enjoying the course so far
6   2025/09/11 12:58:55 PM GMT+1    Naheemot      Adebiyi           AI  Abeok

In [24]:
# Filtering rows where city is either abeokuta or lagos
cities = ["Lagos", "Abeokuta"]
city_filtered = df[df["City"].isin(cities)]
print("Rows where city is in Lagos or Abeokuta:")
city_filtered

Rows where city is in Lagos or Abeokuta:


Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa,AI,Lagos,Female,22,HP,Windows,Thank you so much for the opportunity.
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking
5,2025/09/11 12:58:55 PM GMT+1,Abdulmalik,Adedotun,AI,Abeokuta,Male,200,HP,Windows,Enjoying the course so far
6,2025/09/11 12:58:55 PM GMT+1,Naheemot,Adebiyi,AI,Abeokuta,Female,32,DELL,Windows,Grateful for the opportunity to be here.
7,2025/09/11 12:59:00 PM GMT+1,Kanyisola,Fagbayi,AI;Data Science,Lagos,Female,82,HP,Windows,One chin chin for you for this form
9,2025/09/11 12:59:28 PM GMT+1,Hannah,Tanimola,AI,Abeokuta,Male,30,HP,Windows,On God
10,2025/09/11 12:59:41 PM GMT+1,Deborah,Adelegan,AI;Data Science,Abeokuta,Female,1,HP,Windows,None for now
11,2025/09/11 12:59:43 PM GMT+1,Esther,Kudoro,AI,Abeokuta,Female,1,HP,Windows,Chill


Using the .query method

In [25]:
# Using the query() to filter rows where Course_Track is AI
query_filterd = df.query("City == 'Abeokuta' and Feedback == 'Excellent'")
print("Rows filtered using query() method")
query_filterd

Rows filtered using query() method


Unnamed: 0,Timestamp,First Name,Last Name,Course Track,City,Gender,Seat Number,PC-Make,PC - OS,Feedback
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent


In [26]:
df.columns

Index(['Timestamp', 'First Name', 'Last Name', 'Course Track', 'City', 'Gender', 'Seat Number', 'PC-Make', 'PC - OS', 'Feedback'], dtype='object')

In [27]:
df.columns = df.columns.str.replace("-", " ")

In [28]:
df.columns = df.columns.str.replace(" ", "_")


In [29]:
df.columns= df.columns.str.replace("PC___OS","PC_OS")
df.columns = df.columns.str.strip()

In [30]:
df.columns

Index(['Timestamp', 'First_Name', 'Last_Name', 'Course_Track', 'City', 'Gender', 'Seat_Number', 'PC_Make', 'PC_OS', 'Feedback'], dtype='object')

In [31]:
df.head(1)

Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,Mac OS,non


In [32]:
seat_no = df.query("Seat_Number < 10 and Course_Track == 'AI' ")
print("AI student with seat numbers less than 10")
seat_no

AI student with seat numbers less than 10


Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
11,2025/09/11 12:59:43 PM GMT+1,Esther,Kudoro,AI,Abeokuta,Female,1,HP,Windows,Chill
13,2025/09/11 1:00:13 PM GMT+1,Olasunkanmi,Rasak,AI,Kobape,Male,3,HP,Windows,My gratitude to the sponsor of this program an...
16,2025/09/11 1:00:43 PM GMT+1,Oluwole,Oludayo,AI,Abeokuta,Male,9,HP,Windows,Good training to attend
19,2025/09/11 1:01:31 PM GMT+1,Victor,Ademuyiwa,AI,kobape,Male,2,ASUS,Windows,Feels good
23,2025/09/11 1:02:28 PM GMT+1,Olajide,Abioye,AI,Abeokuta,Male,5,HP,Windows,"So far, i getting to familarize myself with nu..."
30,2025/09/11 1:06:48 PM GMT+1,Gabriel,Bamgbose,AI,Abeokuta,Male,2,HP,Windows,Good


In [33]:
hp_dell = df.query("PC_Make in ['HP','DELL']")
print("Rows where pc make is either HP or DELL")
hp_dell

Rows where pc make is either HP or DELL


Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa,AI,Lagos,Female,22,HP,Windows,Thank you so much for the opportunity.
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,Windows,Amazing Shit
4,2025/09/11 12:58:41 PM GMT+1,Divine,Gbadamosi,AI,Abeokuta,Male,35,DELL,Windows,Brain Racking
5,2025/09/11 12:58:55 PM GMT+1,Abdulmalik,Adedotun,AI,Abeokuta,Male,200,HP,Windows,Enjoying the course so far
6,2025/09/11 12:58:55 PM GMT+1,Naheemot,Adebiyi,AI,Abeokuta,Female,32,DELL,Windows,Grateful for the opportunity to be here.
7,2025/09/11 12:59:00 PM GMT+1,Kanyisola,Fagbayi,AI;Data Science,Lagos,Female,82,HP,Windows,One chin chin for you for this form
8,2025/09/11 12:59:16 PM GMT+1,Blessing,James,Cyber Security,Nairobi,Female,45678,HP,Windows,Thanks for creating the form.
9,2025/09/11 12:59:28 PM GMT+1,Hannah,Tanimola,AI,Abeokuta,Male,30,HP,Windows,On God
10,2025/09/11 12:59:41 PM GMT+1,Deborah,Adelegan,AI;Data Science,Abeokuta,Female,1,HP,Windows,None for now


In [34]:
# Using a python variable inside query() with and @ symbol
desired_track = "Web Dev"
web_dev_students = df.query("Course_Track == @desired_track")
print("Students in the web dev track:")
web_dev_students

Students in the web dev track:


Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
17,2025/09/11 1:00:49 PM GMT+1,Samuel,Oyewusi,Web Dev,Lagos,Male,15,HP,Windows,Satisfactory
32,2025/09/11 1:11:39 PM GMT+1,Oluwapelumi,Adenuga,Web Dev,Abeokuta,Male,36,HP,Windows,live yours


In [35]:
# A complex query filter for Course_Track, Feedback and Seat_Number
complex_query = df.query("Seat_Number <5 or (PC_Make == 'HP' and Gender == 'Female')")
complex_query

Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,Excellent
2,2025/09/11 12:57:08 PM GMT+1,Perpetual,Meninwa,AI,Lagos,Female,22,HP,Windows,Thank you so much for the opportunity.
7,2025/09/11 12:59:00 PM GMT+1,Kanyisola,Fagbayi,AI;Data Science,Lagos,Female,82,HP,Windows,One chin chin for you for this form
8,2025/09/11 12:59:16 PM GMT+1,Blessing,James,Cyber Security,Nairobi,Female,45678,HP,Windows,Thanks for creating the form.
10,2025/09/11 12:59:41 PM GMT+1,Deborah,Adelegan,AI;Data Science,Abeokuta,Female,1,HP,Windows,None for now
11,2025/09/11 12:59:43 PM GMT+1,Esther,Kudoro,AI,Abeokuta,Female,1,HP,Windows,Chill
13,2025/09/11 1:00:13 PM GMT+1,Olasunkanmi,Rasak,AI,Kobape,Male,3,HP,Windows,My gratitude to the sponsor of this program an...
19,2025/09/11 1:01:31 PM GMT+1,Victor,Ademuyiwa,AI,kobape,Male,2,ASUS,Windows,Feels good
26,2025/09/11 1:02:59 PM GMT+1,Oluwaseyi,Egunjobi,Cloud Computing,Cape Town,Male,2,MACBOOK,Mac OS,Nice to meet you all
30,2025/09/11 1:06:48 PM GMT+1,Gabriel,Bamgbose,AI,Abeokuta,Male,2,HP,Windows,Good


### Data Transformation

#### Renaming Column Name

In [36]:
df.rename(columns={'First_Name': 'FirstName', 'Last_Name': 'LastName'})
df.head(1)

Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,Mac OS,non


In [37]:
df['Feedback'] = df['Feedback'].str.lower() #converts the contents of feedback to lower case

In [38]:
df.head(2)

Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,Mac OS,non
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,Windows,excellent


In [39]:
df.columns

Index(['Timestamp', 'First_Name', 'Last_Name', 'Course_Track', 'City', 'Gender', 'Seat_Number', 'PC_Make', 'PC_OS', 'Feedback'], dtype='object')

In [40]:
df['PC_OS'] = df['PC_OS'].str.upper()

In [41]:
df['First_Name'] = df['First_Name'].str.title()

In [42]:
df['City'] = df['City'].str.title()

In [43]:
df.head(2)

Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
0,2025/09/11 12:55:34 PM GMT+1,Peter,Okonmah,AI,Ogun,Male,28,MACBOOK,MAC OS,non
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,WINDOWS,excellent


In [44]:
# Defining a lambda function
lambda x:x.str.title()

# the .apply() method will help to apply the function to the specified columns
#df[col_name] df[col_name].apply(lambda x: x.str.title())

<function __main__.<lambda>(x)>

In [45]:
# Sorting columns
# Rearranges the instances in ascending order using 'City'
df.sort_values(by='City', ascending=True)

Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
16,2025/09/11 1:00:43 PM GMT+1,Oluwole,Oludayo,AI,Abeokuta,Male,9,HP,WINDOWS,good training to attend
31,2025/09/11 1:10:16 PM GMT+1,Ridwanullah,Osho,AI;Cyber Security;Data Science,Abeokuta,Male,45,DELL,WINDOWS,it is what it is !!!
30,2025/09/11 1:06:48 PM GMT+1,Gabriel,Bamgbose,AI,Abeokuta,Male,2,HP,WINDOWS,good
29,2025/09/11 1:06:03 PM GMT+1,Samuel,Dasaolu,AI,Abeokuta,Male,100,HP;MACBOOK,LINUX,"good so far, i guess"
28,2025/09/11 1:03:36 PM GMT+1,Babatunde,Rahmon,AI,Abeokuta,Male,20,none,WINDOWS,i actually have a pc that has a very low stora...
27,2025/09/11 1:03:12 PM GMT+1,Adeoye,Mary,AI,Abeokuta,Female,15,LENOVO,WINDOWS,still processing
24,2025/09/11 1:02:49 PM GMT+1,Solomon,Olaiya,AI,Abeokuta,Male,16,MACBOOK,MAC OS,delighted learning here
23,2025/09/11 1:02:28 PM GMT+1,Olajide,Abioye,AI,Abeokuta,Male,5,HP,WINDOWS,"so far, i getting to familarize myself with nu..."
21,2025/09/11 1:01:46 PM GMT+1,Ayuba,Raji,AI,Abeokuta,Male,26,HP,WINDOWS,none for now
18,2025/09/11 1:00:54 PM GMT+1,Ademola,Akinrinde,AI,Abeokuta,Male,100,MACBOOK,MAC OS,awesome shit\n\n


In [46]:
# Sorting multiple columns
df.sort_values(by=['City', 'PC_Make'], ascending=[True, False])

Unnamed: 0,Timestamp,First_Name,Last_Name,Course_Track,City,Gender,Seat_Number,PC_Make,PC_OS,Feedback
28,2025/09/11 1:03:36 PM GMT+1,Babatunde,Rahmon,AI,Abeokuta,Male,20,none,WINDOWS,i actually have a pc that has a very low stora...
18,2025/09/11 1:00:54 PM GMT+1,Ademola,Akinrinde,AI,Abeokuta,Male,100,MACBOOK,MAC OS,awesome shit\n\n
24,2025/09/11 1:02:49 PM GMT+1,Solomon,Olaiya,AI,Abeokuta,Male,16,MACBOOK,MAC OS,delighted learning here
27,2025/09/11 1:03:12 PM GMT+1,Adeoye,Mary,AI,Abeokuta,Female,15,LENOVO,WINDOWS,still processing
29,2025/09/11 1:06:03 PM GMT+1,Samuel,Dasaolu,AI,Abeokuta,Male,100,HP;MACBOOK,LINUX,"good so far, i guess"
1,2025/09/11 12:56:11 PM GMT+1,Toyeebat,Nababa,AI,Abeokuta,Female,24,HP,WINDOWS,excellent
3,2025/09/11 12:57:56 PM GMT+1,Mahfuz,Abdulhameed,AI,Abeokuta,Male,44,HP,WINDOWS,amazing shit
5,2025/09/11 12:58:55 PM GMT+1,Abdulmalik,Adedotun,AI,Abeokuta,Male,200,HP,WINDOWS,enjoying the course so far
9,2025/09/11 12:59:28 PM GMT+1,Hannah,Tanimola,AI,Abeokuta,Male,30,HP,WINDOWS,on god
10,2025/09/11 12:59:41 PM GMT+1,Deborah,Adelegan,AI;Data Science,Abeokuta,Female,1,HP,WINDOWS,none for now


Sorting by row labels or index<br><br>
.sort_index() method is used to sort the dataframe by its index or row label 

### Grouping and Aggregation

The `.agg({})` takes in a key-value pair  of column name and an aggregation function as an argument which could be one or more depending on what you are working on. When using the aggregating functions with the agg({}) method dictionary we dont usually add the round brackets.

In [47]:
# Manually creating a bio_data sample data
bio = {
    'First_Name': ['Emeka', 'Aisha', 'Ayo', 'Chinedu', 'Fatima', 'Ibrahim', 'Ngozi', 'Tolu', 'Olamide', 'Yusuf',
                   'Ada', 'Kunle', 'Mercy', 'Segun', 'Zainab', 'Donald', 'Kemi', 'Usman', 'Funmi', 'Chika'],
    'Last_Name': ['Julius', 'Bello', 'Adewale', 'Godswill', 'Abubakar', 'David', 'Collins', 'Ogunleye', 'Adepoju', 'Garba',
                  'Umeh', 'Ojo', 'Musa', 'Balogun', 'Mohammed', 'Obi', 'Adebayo', 'Suleiman', 'Williams', 'Micheal'],
    'Gender': ['Male', 'Female', 'Male', 'Male', 'Female', 'Male', 'Female', 'Male', 'Male', 'Male',
               'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male'],
    'Seat_No': range(101, 121),
    'City': ['Lagos', 'Abuja', 'Ibadan', 'Enugu', 'Kano', 'Benin', 'Port Harcourt', 'Abeokuta', 'Benin', 'Abeokuta',
             'Lagos', 'Abeokuta', 'Lagos', 'Ibadan', 'Abuja', 'Port Harcourt', 'Benin', 'Jos', 'Calabar', 'Onitsha'],
    'Course_Track': ['Data Science', 'Cloud Computing', 'Cybersecurity', 'AI', 'Data Science', 'Cloud Computing',
                     'Web Dev', 'AI', 'Cybersecurity', 'AI', 'Data Science', 'Web Dev',
                     'Cybersecurity', 'AI', 'Cloud Computing', 'Data Science', 'Web Dev', 'Data Science',
                     'Data Science', 'Cloud Computing'],
    'PC_make': ['HP', 'Dell', 'HP', 'Asus', 'Apple', 'HP', 'Dell', 'Lenovo', 'Asus', 'Apple',
                'HP', 'Dell', 'Lenovo', 'Asus', 'Dell', 'HP', 'Dell', 'Lenovo', 'Asus', 'Apple'],
    'PC_Os': ['Windows', 'Linux', 'Windows', 'Windows', 'Linux', 'MacOS', 'Windows', 'Linux', 'MacOS', 'Windows',
              'Linux', 'MacOS', 'Windows', 'Linux', 'MacOS', 'Windows', 'Linux', 'MacOS', 'Windows', 'Linux'],
    'Feedback': ['Good', 'Excellent', 'Excellent', 'Good', 'Poor', 'Excellent', 'Good', 'Average', 'Good', 'Excellent',
                 'Good', 'Poor', 'Average', 'Excellent', 'Good', 'Average', 'Excellent', 'Good', 'Good', 'Excellent']
}

In [48]:
df2 = pd.DataFrame(bio)

In [49]:
df2.head(3)

Unnamed: 0,First_Name,Last_Name,Gender,Seat_No,City,Course_Track,PC_make,PC_Os,Feedback
0,Emeka,Julius,Male,101,Lagos,Data Science,HP,Windows,Good
1,Aisha,Bello,Female,102,Abuja,Cloud Computing,Dell,Linux,Excellent
2,Ayo,Adewale,Male,103,Ibadan,Cybersecurity,HP,Windows,Excellent


In [50]:
#Saving as a csv file
df2.to_csv("biodata2.csv", index=False)

In [52]:
df2["Course_Track"].unique()

array(['Data Science', 'Cloud Computing', 'Cybersecurity', 'AI',
       'Web Dev'], dtype=object)

In [53]:
df2['First_Name'].nunique()

20

In [54]:
#What is the total number of students taking each track?
track_count = df2.groupby("Course_Track").agg({"First_Name": "count"})
track_count

Unnamed: 0_level_0,First_Name
Course_Track,Unnamed: 1_level_1
AI,4
Cloud Computing,4
Cybersecurity,3
Data Science,6
Web Dev,3


In [58]:
#What is the total number of students having the same numbers of PC_make
df2.groupby("PC_make")["PC_make"].count()


PC_make
Apple     3
Asus      4
Dell      5
HP        5
Lenovo    3
Name: PC_make, dtype: int64